1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * SN Platform GRU Driver |
4 | * |
5 | * GRU DRIVER TABLES, MACROS, externs, etc |
6 | * |
7 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. |
8 | */ |
9 | |
10 | #ifndef __GRUTABLES_H__ |
11 | #define __GRUTABLES_H__ |
12 | |
13 | /* |
14 | * GRU Chiplet: |
15 | * The GRU is a user addressible memory accelerator. It provides |
16 | * several forms of load, store, memset, bcopy instructions. In addition, it |
17 | * contains special instructions for AMOs, sending messages to message |
18 | * queues, etc. |
19 | * |
20 | * The GRU is an integral part of the node controller. It connects |
21 | * directly to the cpu socket. In its current implementation, there are 2 |
22 | * GRU chiplets in the node controller on each blade (~node). |
23 | * |
24 | * The entire GRU memory space is fully coherent and cacheable by the cpus. |
25 | * |
26 | * Each GRU chiplet has a physical memory map that looks like the following: |
27 | * |
28 | * +-----------------+ |
29 | * |/////////////////| |
30 | * |/////////////////| |
31 | * |/////////////////| |
32 | * |/////////////////| |
33 | * |/////////////////| |
34 | * |/////////////////| |
35 | * |/////////////////| |
36 | * |/////////////////| |
37 | * +-----------------+ |
38 | * | system control | |
39 | * +-----------------+ _______ +-------------+ |
40 | * |/////////////////| / | | |
41 | * |/////////////////| / | | |
42 | * |/////////////////| / | instructions| |
43 | * |/////////////////| / | | |
44 | * |/////////////////| / | | |
45 | * |/////////////////| / |-------------| |
46 | * |/////////////////| / | | |
47 | * +-----------------+ | | |
48 | * | context 15 | | data | |
49 | * +-----------------+ | | |
50 | * | ...... | \ | | |
51 | * +-----------------+ \____________ +-------------+ |
52 | * | context 1 | |
53 | * +-----------------+ |
54 | * | context 0 | |
55 | * +-----------------+ |
56 | * |
57 | * Each of the "contexts" is a chunk of memory that can be mmaped into user |
58 | * space. The context consists of 2 parts: |
59 | * |
60 | * - an instruction space that can be directly accessed by the user |
61 | * to issue GRU instructions and to check instruction status. |
62 | * |
63 | * - a data area that acts as normal RAM. |
64 | * |
65 | * User instructions contain virtual addresses of data to be accessed by the |
66 | * GRU. The GRU contains a TLB that is used to convert these user virtual |
67 | * addresses to physical addresses. |
68 | * |
69 | * The "system control" area of the GRU chiplet is used by the kernel driver |
70 | * to manage user contexts and to perform functions such as TLB dropin and |
71 | * purging. |
72 | * |
73 | * One context may be reserved for the kernel and used for cross-partition |
74 | * communication. The GRU will also be used to asynchronously zero out |
75 | * large blocks of memory (not currently implemented). |
76 | * |
77 | * |
78 | * Tables: |
79 | * |
80 | * VDATA-VMA Data - Holds a few parameters. Head of linked list of |
81 | * GTS tables for threads using the GSEG |
82 | * GTS - Gru Thread State - contains info for managing a GSEG context. A |
83 | * GTS is allocated for each thread accessing a |
84 | * GSEG. |
85 | * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is |
86 | * not loaded into a GRU |
87 | * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs |
88 | * where a GSEG has been loaded. Similar to |
89 | * an mm_struct but for GRU. |
90 | * |
91 | * GS - GRU State - Used to manage the state of a GRU chiplet |
92 | * BS - Blade State - Used to manage state of all GRU chiplets |
93 | * on a blade |
94 | * |
95 | * |
96 | * Normal task tables for task using GRU. |
97 | * - 2 threads in process |
98 | * - 2 GSEGs open in process |
99 | * - GSEG1 is being used by both threads |
100 | * - GSEG2 is used only by thread 2 |
101 | * |
102 | * task -->| |
103 | * task ---+---> mm ->------ (notifier) -------+-> gms |
104 | * | | |
105 | * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) |
106 | * | | | |
107 | * | +-> gts--->| GSEG1 (thread2) |
108 | * | | |
109 | * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) |
110 | * . |
111 | * . |
112 | * |
113 | * GSEGs are marked DONTCOPY on fork |
114 | * |
115 | * At open |
116 | * file.private_data -> NULL |
117 | * |
118 | * At mmap, |
119 | * vma -> vdata |
120 | * |
121 | * After gseg reference |
122 | * vma -> vdata ->gts |
123 | * |
124 | * After fork |
125 | * parent |
126 | * vma -> vdata -> gts |
127 | * child |
128 | * (vma is not copied) |
129 | * |
130 | */ |
131 | |
132 | #include <linux/refcount.h> |
133 | #include <linux/rmap.h> |
134 | #include <linux/interrupt.h> |
135 | #include <linux/mutex.h> |
136 | #include <linux/wait.h> |
137 | #include <linux/mmu_notifier.h> |
138 | #include <linux/mm_types.h> |
139 | #include "gru.h" |
140 | #include "grulib.h" |
141 | #include "gruhandles.h" |
142 | |
143 | extern struct gru_stats_s gru_stats; |
144 | extern struct gru_blade_state *gru_base[]; |
145 | extern unsigned long gru_start_paddr, gru_end_paddr; |
146 | extern void *gru_start_vaddr; |
147 | extern unsigned int gru_max_gids; |
148 | |
149 | #define GRU_MAX_BLADES MAX_NUMNODES |
150 | #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) |
151 | |
152 | #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" |
153 | #define GRU_DRIVER_VERSION_STR "0.85" |
154 | |
155 | /* |
156 | * GRU statistics. |
157 | */ |
158 | struct gru_stats_s { |
159 | atomic_long_t vdata_alloc; |
160 | atomic_long_t vdata_free; |
161 | atomic_long_t gts_alloc; |
162 | atomic_long_t gts_free; |
163 | atomic_long_t gms_alloc; |
164 | atomic_long_t gms_free; |
165 | atomic_long_t gts_double_allocate; |
166 | atomic_long_t assign_context; |
167 | atomic_long_t assign_context_failed; |
168 | atomic_long_t free_context; |
169 | atomic_long_t load_user_context; |
170 | atomic_long_t load_kernel_context; |
171 | atomic_long_t lock_kernel_context; |
172 | atomic_long_t unlock_kernel_context; |
173 | atomic_long_t steal_user_context; |
174 | atomic_long_t steal_kernel_context; |
175 | atomic_long_t steal_context_failed; |
176 | atomic_long_t nopfn; |
177 | atomic_long_t asid_new; |
178 | atomic_long_t asid_next; |
179 | atomic_long_t asid_wrap; |
180 | atomic_long_t asid_reuse; |
181 | atomic_long_t intr; |
182 | atomic_long_t intr_cbr; |
183 | atomic_long_t intr_tfh; |
184 | atomic_long_t intr_spurious; |
185 | atomic_long_t intr_mm_lock_failed; |
186 | atomic_long_t call_os; |
187 | atomic_long_t call_os_wait_queue; |
188 | atomic_long_t user_flush_tlb; |
189 | atomic_long_t user_unload_context; |
190 | atomic_long_t user_exception; |
191 | atomic_long_t set_context_option; |
192 | atomic_long_t check_context_retarget_intr; |
193 | atomic_long_t check_context_unload; |
194 | atomic_long_t tlb_dropin; |
195 | atomic_long_t tlb_preload_page; |
196 | atomic_long_t tlb_dropin_fail_no_asid; |
197 | atomic_long_t tlb_dropin_fail_upm; |
198 | atomic_long_t tlb_dropin_fail_invalid; |
199 | atomic_long_t tlb_dropin_fail_range_active; |
200 | atomic_long_t tlb_dropin_fail_idle; |
201 | atomic_long_t tlb_dropin_fail_fmm; |
202 | atomic_long_t tlb_dropin_fail_no_exception; |
203 | atomic_long_t tfh_stale_on_fault; |
204 | atomic_long_t mmu_invalidate_range; |
205 | atomic_long_t mmu_invalidate_page; |
206 | atomic_long_t flush_tlb; |
207 | atomic_long_t flush_tlb_gru; |
208 | atomic_long_t flush_tlb_gru_tgh; |
209 | atomic_long_t flush_tlb_gru_zero_asid; |
210 | |
211 | atomic_long_t copy_gpa; |
212 | atomic_long_t read_gpa; |
213 | |
214 | atomic_long_t mesq_receive; |
215 | atomic_long_t mesq_receive_none; |
216 | atomic_long_t mesq_send; |
217 | atomic_long_t mesq_send_failed; |
218 | atomic_long_t mesq_noop; |
219 | atomic_long_t mesq_send_unexpected_error; |
220 | atomic_long_t mesq_send_lb_overflow; |
221 | atomic_long_t mesq_send_qlimit_reached; |
222 | atomic_long_t mesq_send_amo_nacked; |
223 | atomic_long_t mesq_send_put_nacked; |
224 | atomic_long_t mesq_page_overflow; |
225 | atomic_long_t mesq_qf_locked; |
226 | atomic_long_t mesq_qf_noop_not_full; |
227 | atomic_long_t mesq_qf_switch_head_failed; |
228 | atomic_long_t mesq_qf_unexpected_error; |
229 | atomic_long_t mesq_noop_unexpected_error; |
230 | atomic_long_t mesq_noop_lb_overflow; |
231 | atomic_long_t mesq_noop_qlimit_reached; |
232 | atomic_long_t mesq_noop_amo_nacked; |
233 | atomic_long_t mesq_noop_put_nacked; |
234 | atomic_long_t mesq_noop_page_overflow; |
235 | |
236 | }; |
237 | |
238 | enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, |
239 | cchop_deallocate, tfhop_write_only, tfhop_write_restart, |
240 | tghop_invalidate, mcsop_last}; |
241 | |
242 | struct mcs_op_statistic { |
243 | atomic_long_t count; |
244 | atomic_long_t total; |
245 | unsigned long max; |
246 | }; |
247 | |
248 | extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; |
249 | |
250 | #define OPT_DPRINT 1 |
251 | #define OPT_STATS 2 |
252 | |
253 | |
254 | #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ |
255 | |
256 | /* Delay in jiffies between attempts to assign a GRU context */ |
257 | #define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) |
258 | |
259 | /* |
260 | * If a process has it's context stolen, min delay in jiffies before trying to |
261 | * steal a context from another process. |
262 | */ |
263 | #define GRU_STEAL_DELAY ((HZ * 200) / 1000) |
264 | |
265 | #define STAT(id) do { \ |
266 | if (gru_options & OPT_STATS) \ |
267 | atomic_long_inc(&gru_stats.id); \ |
268 | } while (0) |
269 | |
270 | #ifdef CONFIG_SGI_GRU_DEBUG |
271 | #define gru_dbg(dev, fmt, x...) \ |
272 | do { \ |
273 | if (gru_options & OPT_DPRINT) \ |
274 | printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ |
275 | } while (0) |
276 | #else |
277 | #define gru_dbg(x...) |
278 | #endif |
279 | |
280 | /*----------------------------------------------------------------------------- |
281 | * ASID management |
282 | */ |
283 | #define MAX_ASID 0xfffff0 |
284 | #define MIN_ASID 8 |
285 | #define ASID_INC 8 /* number of regions */ |
286 | |
287 | /* Generate a GRU asid value from a GRU base asid & a virtual address. */ |
288 | #define VADDR_HI_BIT 64 |
289 | #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) |
290 | #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) |
291 | |
292 | /*------------------------------------------------------------------------------ |
293 | * File & VMS Tables |
294 | */ |
295 | |
296 | struct gru_state; |
297 | |
298 | /* |
299 | * This structure is pointed to from the mmstruct via the notifier pointer. |
300 | * There is one of these per address space. |
301 | */ |
302 | struct gru_mm_tracker { /* pack to reduce size */ |
303 | unsigned int mt_asid_gen:24; /* ASID wrap count */ |
304 | unsigned int mt_asid:24; /* current base ASID for gru */ |
305 | unsigned short mt_ctxbitmap:16;/* bitmap of contexts using |
306 | asid */ |
307 | } __attribute__ ((packed)); |
308 | |
309 | struct gru_mm_struct { |
310 | struct mmu_notifier ms_notifier; |
311 | spinlock_t ms_asid_lock; /* protects ASID assignment */ |
312 | atomic_t ms_range_active;/* num range_invals active */ |
313 | wait_queue_head_t ms_wait_queue; |
314 | DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); |
315 | struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; |
316 | }; |
317 | |
318 | /* |
319 | * One of these structures is allocated when a GSEG is mmaped. The |
320 | * structure is pointed to by the vma->vm_private_data field in the vma struct. |
321 | */ |
322 | struct gru_vma_data { |
323 | spinlock_t vd_lock; /* Serialize access to vma */ |
324 | struct list_head vd_head; /* head of linked list of gts */ |
325 | long vd_user_options;/* misc user option flags */ |
326 | int vd_cbr_au_count; |
327 | int vd_dsr_au_count; |
328 | unsigned char vd_tlb_preload_count; |
329 | }; |
330 | |
331 | /* |
332 | * One of these is allocated for each thread accessing a mmaped GRU. A linked |
333 | * list of these structure is hung off the struct gru_vma_data in the mm_struct. |
334 | */ |
335 | struct gru_thread_state { |
336 | struct list_head ts_next; /* list - head at vma-private */ |
337 | struct mutex ts_ctxlock; /* load/unload CTX lock */ |
338 | struct mm_struct *ts_mm; /* mm currently mapped to |
339 | context */ |
340 | struct vm_area_struct *ts_vma; /* vma of GRU context */ |
341 | struct gru_state *ts_gru; /* GRU where the context is |
342 | loaded */ |
343 | struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ |
344 | unsigned char ts_tlb_preload_count; /* TLB preload pages */ |
345 | unsigned long ts_cbr_map; /* map of allocated CBRs */ |
346 | unsigned long ts_dsr_map; /* map of allocated DATA |
347 | resources */ |
348 | unsigned long ts_steal_jiffies;/* jiffies when context last |
349 | stolen */ |
350 | long ts_user_options;/* misc user option flags */ |
351 | pid_t ts_tgid_owner; /* task that is using the |
352 | context - for migration */ |
353 | short ts_user_blade_id;/* user selected blade */ |
354 | signed char ts_user_chiplet_id;/* user selected chiplet */ |
355 | unsigned short ts_sizeavail; /* Pagesizes in use */ |
356 | int ts_tsid; /* thread that owns the |
357 | structure */ |
358 | int ts_tlb_int_select;/* target cpu if interrupts |
359 | enabled */ |
360 | int ts_ctxnum; /* context number where the |
361 | context is loaded */ |
362 | refcount_t ts_refcnt; /* reference count GTS */ |
363 | unsigned char ts_dsr_au_count;/* Number of DSR resources |
364 | required for contest */ |
365 | unsigned char ts_cbr_au_count;/* Number of CBR resources |
366 | required for contest */ |
367 | signed char ts_cch_req_slice;/* CCH packet slice */ |
368 | signed char ts_blade; /* If >= 0, migrate context if |
369 | ref from different blade */ |
370 | signed char ts_force_cch_reload; |
371 | signed char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each |
372 | allocated CB */ |
373 | int ts_data_valid; /* Indicates if ts_gdata has |
374 | valid data */ |
375 | struct gru_gseg_statistics ustats; /* User statistics */ |
376 | unsigned long ts_gdata[]; /* save area for GRU data (CB, |
377 | DS, CBE) */ |
378 | }; |
379 | |
380 | /* |
381 | * Threaded programs actually allocate an array of GSEGs when a context is |
382 | * created. Each thread uses a separate GSEG. TSID is the index into the GSEG |
383 | * array. |
384 | */ |
385 | #define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) |
386 | #define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ |
387 | (gts)->ts_tsid * GRU_GSEG_PAGESIZE) |
388 | |
389 | #define NULLCTX (-1) /* if context not loaded into GRU */ |
390 | |
391 | /*----------------------------------------------------------------------------- |
392 | * GRU State Tables |
393 | */ |
394 | |
395 | /* |
396 | * One of these exists for each GRU chiplet. |
397 | */ |
398 | struct gru_state { |
399 | struct gru_blade_state *gs_blade; /* GRU state for entire |
400 | blade */ |
401 | unsigned long gs_gru_base_paddr; /* Physical address of |
402 | gru segments (64) */ |
403 | void *gs_gru_base_vaddr; /* Virtual address of |
404 | gru segments (64) */ |
405 | unsigned short gs_gid; /* unique GRU number */ |
406 | unsigned short gs_blade_id; /* blade of GRU */ |
407 | unsigned char gs_chiplet_id; /* blade chiplet of GRU */ |
408 | unsigned char gs_tgh_local_shift; /* used to pick TGH for |
409 | local flush */ |
410 | unsigned char gs_tgh_first_remote; /* starting TGH# for |
411 | remote flush */ |
412 | spinlock_t gs_asid_lock; /* lock used for |
413 | assigning asids */ |
414 | spinlock_t gs_lock; /* lock used for |
415 | assigning contexts */ |
416 | |
417 | /* -- the following are protected by the gs_asid_lock spinlock ---- */ |
418 | unsigned int gs_asid; /* Next availe ASID */ |
419 | unsigned int gs_asid_limit; /* Limit of available |
420 | ASIDs */ |
421 | unsigned int gs_asid_gen; /* asid generation. |
422 | Inc on wrap */ |
423 | |
424 | /* --- the following fields are protected by the gs_lock spinlock --- */ |
425 | unsigned long gs_context_map; /* bitmap to manage |
426 | contexts in use */ |
427 | unsigned long gs_cbr_map; /* bitmap to manage CB |
428 | resources */ |
429 | unsigned long gs_dsr_map; /* bitmap used to manage |
430 | DATA resources */ |
431 | unsigned int gs_reserved_cbrs; /* Number of kernel- |
432 | reserved cbrs */ |
433 | unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- |
434 | reserved dsrs */ |
435 | unsigned short gs_active_contexts; /* number of contexts |
436 | in use */ |
437 | struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using |
438 | the context */ |
439 | int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ |
440 | }; |
441 | |
442 | /* |
443 | * This structure contains the GRU state for all the GRUs on a blade. |
444 | */ |
445 | struct gru_blade_state { |
446 | void *kernel_cb; /* First kernel |
447 | reserved cb */ |
448 | void *kernel_dsr; /* First kernel |
449 | reserved DSR */ |
450 | struct rw_semaphore bs_kgts_sema; /* lock for kgts */ |
451 | struct gru_thread_state *bs_kgts; /* GTS for kernel use */ |
452 | |
453 | /* ---- the following are used for managing kernel async GRU CBRs --- */ |
454 | int bs_async_dsr_bytes; /* DSRs for async */ |
455 | int bs_async_cbrs; /* CBRs AU for async */ |
456 | struct completion *bs_async_wq; |
457 | |
458 | /* ---- the following are protected by the bs_lock spinlock ---- */ |
459 | spinlock_t bs_lock; /* lock used for |
460 | stealing contexts */ |
461 | int bs_lru_ctxnum; /* STEAL - last context |
462 | stolen */ |
463 | struct gru_state *bs_lru_gru; /* STEAL - last gru |
464 | stolen */ |
465 | |
466 | struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; |
467 | }; |
468 | |
469 | /*----------------------------------------------------------------------------- |
470 | * Address Primitives |
471 | */ |
472 | #define get_tfm_for_cpu(g, c) \ |
473 | ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) |
474 | #define get_tfh_by_index(g, i) \ |
475 | ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) |
476 | #define get_tgh_by_index(g, i) \ |
477 | ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) |
478 | #define get_cbe_by_index(g, i) \ |
479 | ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ |
480 | (i))) |
481 | |
482 | /*----------------------------------------------------------------------------- |
483 | * Useful Macros |
484 | */ |
485 | |
486 | /* Given a blade# & chiplet#, get a pointer to the GRU */ |
487 | #define get_gru(b, c) (&gru_base[b]->bs_grus[c]) |
488 | |
489 | /* Number of bytes to save/restore when unloading/loading GRU contexts */ |
490 | #define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) |
491 | #define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) |
492 | |
493 | /* Convert a user CB number to the actual CBRNUM */ |
494 | #define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ |
495 | * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) |
496 | |
497 | /* Convert a gid to a pointer to the GRU */ |
498 | #define GID_TO_GRU(gid) \ |
499 | (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ |
500 | (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ |
501 | bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ |
502 | NULL) |
503 | |
504 | /* Scan all active GRUs in a GRU bitmap */ |
505 | #define for_each_gru_in_bitmap(gid, map) \ |
506 | for_each_set_bit((gid), (map), GRU_MAX_GRUS) |
507 | |
508 | /* Scan all active GRUs on a specific blade */ |
509 | #define for_each_gru_on_blade(gru, nid, i) \ |
510 | for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ |
511 | (i) < GRU_CHIPLETS_PER_BLADE; \ |
512 | (i)++, (gru)++) |
513 | |
514 | /* Scan all GRUs */ |
515 | #define foreach_gid(gid) \ |
516 | for ((gid) = 0; (gid) < gru_max_gids; (gid)++) |
517 | |
518 | /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ |
519 | #define for_each_gts_on_gru(gts, gru, ctxnum) \ |
520 | for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ |
521 | if (((gts) = (gru)->gs_gts[ctxnum])) |
522 | |
523 | /* Scan each CBR whose bit is set in a TFM (or copy of) */ |
524 | #define for_each_cbr_in_tfm(i, map) \ |
525 | for_each_set_bit((i), (map), GRU_NUM_CBE) |
526 | |
527 | /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ |
528 | #define for_each_cbr_in_allocation_map(i, map, k) \ |
529 | for_each_set_bit((k), (map), GRU_CBR_AU) \ |
530 | for ((i) = (k)*GRU_CBR_AU_SIZE; \ |
531 | (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) |
532 | |
533 | #define gseg_physical_address(gru, ctxnum) \ |
534 | ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) |
535 | #define gseg_virtual_address(gru, ctxnum) \ |
536 | ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) |
537 | |
538 | /*----------------------------------------------------------------------------- |
539 | * Lock / Unlock GRU handles |
540 | * Use the "delresp" bit in the handle as a "lock" bit. |
541 | */ |
542 | |
543 | /* Lock hierarchy checking enabled only in emulator */ |
544 | |
545 | /* 0 = lock failed, 1 = locked */ |
546 | static inline int __trylock_handle(void *h) |
547 | { |
548 | return !test_and_set_bit(nr: 1, addr: h); |
549 | } |
550 | |
551 | static inline void __lock_handle(void *h) |
552 | { |
553 | while (test_and_set_bit(nr: 1, addr: h)) |
554 | cpu_relax(); |
555 | } |
556 | |
557 | static inline void __unlock_handle(void *h) |
558 | { |
559 | clear_bit(nr: 1, addr: h); |
560 | } |
561 | |
562 | static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) |
563 | { |
564 | return __trylock_handle(h: cch); |
565 | } |
566 | |
567 | static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) |
568 | { |
569 | __lock_handle(h: cch); |
570 | } |
571 | |
572 | static inline void unlock_cch_handle(struct gru_context_configuration_handle |
573 | *cch) |
574 | { |
575 | __unlock_handle(h: cch); |
576 | } |
577 | |
578 | static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) |
579 | { |
580 | __lock_handle(h: tgh); |
581 | } |
582 | |
583 | static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) |
584 | { |
585 | __unlock_handle(h: tgh); |
586 | } |
587 | |
588 | static inline int is_kernel_context(struct gru_thread_state *gts) |
589 | { |
590 | return !gts->ts_mm; |
591 | } |
592 | |
593 | /* |
594 | * The following are for Nehelem-EX. A more general scheme is needed for |
595 | * future processors. |
596 | */ |
597 | #define UV_MAX_INT_CORES 8 |
598 | #define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) |
599 | #define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) |
600 | #define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ |
601 | ((cpu_physical_id(p) >> 1) & 3)) |
602 | /*----------------------------------------------------------------------------- |
603 | * Function prototypes & externs |
604 | */ |
605 | struct gru_unload_context_req; |
606 | |
607 | extern const struct vm_operations_struct gru_vm_ops; |
608 | extern struct device *grudev; |
609 | |
610 | extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, |
611 | int tsid); |
612 | extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct |
613 | *vma, int tsid); |
614 | extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct |
615 | *vma, int tsid); |
616 | extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); |
617 | extern void gru_load_context(struct gru_thread_state *gts); |
618 | extern void gru_steal_context(struct gru_thread_state *gts); |
619 | extern void gru_unload_context(struct gru_thread_state *gts, int savestate); |
620 | extern int gru_update_cch(struct gru_thread_state *gts); |
621 | extern void gts_drop(struct gru_thread_state *gts); |
622 | extern void gru_tgh_flush_init(struct gru_state *gru); |
623 | extern int gru_kservices_init(void); |
624 | extern void gru_kservices_exit(void); |
625 | extern irqreturn_t gru0_intr(int irq, void *dev_id); |
626 | extern irqreturn_t gru1_intr(int irq, void *dev_id); |
627 | extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); |
628 | extern int gru_dump_chiplet_request(unsigned long arg); |
629 | extern long gru_get_gseg_statistics(unsigned long arg); |
630 | extern int gru_handle_user_call_os(unsigned long address); |
631 | extern int gru_user_flush_tlb(unsigned long arg); |
632 | extern int gru_user_unload_context(unsigned long arg); |
633 | extern int gru_get_exception_detail(unsigned long arg); |
634 | extern int gru_set_context_option(unsigned long address); |
635 | extern int gru_check_context_placement(struct gru_thread_state *gts); |
636 | extern int gru_cpu_fault_map_id(void); |
637 | extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); |
638 | extern void gru_flush_all_tlb(struct gru_state *gru); |
639 | extern int gru_proc_init(void); |
640 | extern void gru_proc_exit(void); |
641 | |
642 | extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, |
643 | int cbr_au_count, int dsr_au_count, |
644 | unsigned char tlb_preload_count, int options, int tsid); |
645 | extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, |
646 | int cbr_au_count, signed char *cbmap); |
647 | extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, |
648 | int dsr_au_count, signed char *dsmap); |
649 | extern vm_fault_t gru_fault(struct vm_fault *vmf); |
650 | extern struct gru_mm_struct *gru_register_mmu_notifier(void); |
651 | extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); |
652 | |
653 | extern int gru_ktest(unsigned long arg); |
654 | extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, |
655 | unsigned long len); |
656 | |
657 | extern unsigned long gru_options; |
658 | |
659 | #endif /* __GRUTABLES_H__ */ |
660 | |