1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * SN Platform GRU Driver |
4 | * |
5 | * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD |
6 | * |
7 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. |
8 | */ |
9 | |
10 | #include <linux/kernel.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/spinlock.h> |
14 | #include <linux/sched.h> |
15 | #include <linux/device.h> |
16 | #include <linux/list.h> |
17 | #include <linux/err.h> |
18 | #include <linux/prefetch.h> |
19 | #include <asm/uv/uv_hub.h> |
20 | #include "gru.h" |
21 | #include "grutables.h" |
22 | #include "gruhandles.h" |
23 | |
24 | unsigned long gru_options __read_mostly; |
25 | |
26 | static struct device_driver gru_driver = { |
27 | .name = "gru" |
28 | }; |
29 | |
30 | static struct device gru_device = { |
31 | .init_name = "" , |
32 | .driver = &gru_driver, |
33 | }; |
34 | |
35 | struct device *grudev = &gru_device; |
36 | |
37 | /* |
38 | * Select a gru fault map to be used by the current cpu. Note that |
39 | * multiple cpus may be using the same map. |
40 | * ZZZ should be inline but did not work on emulator |
41 | */ |
42 | int gru_cpu_fault_map_id(void) |
43 | { |
44 | int cpu = smp_processor_id(); |
45 | int id, core; |
46 | |
47 | core = uv_cpu_core_number(cpu); |
48 | id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); |
49 | return id; |
50 | } |
51 | |
52 | /*--------- ASID Management ------------------------------------------- |
53 | * |
54 | * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. |
55 | * Once MAX is reached, flush the TLB & start over. However, |
56 | * some asids may still be in use. There won't be many (percentage wise) still |
57 | * in use. Search active contexts & determine the value of the first |
58 | * asid in use ("x"s below). Set "limit" to this value. |
59 | * This defines a block of assignable asids. |
60 | * |
61 | * When "limit" is reached, search forward from limit+1 and determine the |
62 | * next block of assignable asids. |
63 | * |
64 | * Repeat until MAX_ASID is reached, then start over again. |
65 | * |
66 | * Each time MAX_ASID is reached, increment the asid generation. Since |
67 | * the search for in-use asids only checks contexts with GRUs currently |
68 | * assigned, asids in some contexts will be missed. Prior to loading |
69 | * a context, the asid generation of the GTS asid is rechecked. If it |
70 | * doesn't match the current generation, a new asid will be assigned. |
71 | * |
72 | * 0---------------x------------x---------------------x----| |
73 | * ^-next ^-limit ^-MAX_ASID |
74 | * |
75 | * All asid manipulation & context loading/unloading is protected by the |
76 | * gs_lock. |
77 | */ |
78 | |
79 | /* Hit the asid limit. Start over */ |
80 | static int gru_wrap_asid(struct gru_state *gru) |
81 | { |
82 | gru_dbg(grudev, "gid %d\n" , gru->gs_gid); |
83 | STAT(asid_wrap); |
84 | gru->gs_asid_gen++; |
85 | return MIN_ASID; |
86 | } |
87 | |
88 | /* Find the next chunk of unused asids */ |
89 | static int gru_reset_asid_limit(struct gru_state *gru, int asid) |
90 | { |
91 | int i, gid, inuse_asid, limit; |
92 | |
93 | gru_dbg(grudev, "gid %d, asid 0x%x\n" , gru->gs_gid, asid); |
94 | STAT(asid_next); |
95 | limit = MAX_ASID; |
96 | if (asid >= limit) |
97 | asid = gru_wrap_asid(gru); |
98 | gru_flush_all_tlb(gru); |
99 | gid = gru->gs_gid; |
100 | again: |
101 | for (i = 0; i < GRU_NUM_CCH; i++) { |
102 | if (!gru->gs_gts[i] || is_kernel_context(gts: gru->gs_gts[i])) |
103 | continue; |
104 | inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; |
105 | gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n" , |
106 | gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, |
107 | inuse_asid, i); |
108 | if (inuse_asid == asid) { |
109 | asid += ASID_INC; |
110 | if (asid >= limit) { |
111 | /* |
112 | * empty range: reset the range limit and |
113 | * start over |
114 | */ |
115 | limit = MAX_ASID; |
116 | if (asid >= MAX_ASID) |
117 | asid = gru_wrap_asid(gru); |
118 | goto again; |
119 | } |
120 | } |
121 | |
122 | if ((inuse_asid > asid) && (inuse_asid < limit)) |
123 | limit = inuse_asid; |
124 | } |
125 | gru->gs_asid_limit = limit; |
126 | gru->gs_asid = asid; |
127 | gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n" , gru->gs_gid, |
128 | asid, limit); |
129 | return asid; |
130 | } |
131 | |
132 | /* Assign a new ASID to a thread context. */ |
133 | static int gru_assign_asid(struct gru_state *gru) |
134 | { |
135 | int asid; |
136 | |
137 | gru->gs_asid += ASID_INC; |
138 | asid = gru->gs_asid; |
139 | if (asid >= gru->gs_asid_limit) |
140 | asid = gru_reset_asid_limit(gru, asid); |
141 | |
142 | gru_dbg(grudev, "gid %d, asid 0x%x\n" , gru->gs_gid, asid); |
143 | return asid; |
144 | } |
145 | |
146 | /* |
147 | * Clear n bits in a word. Return a word indicating the bits that were cleared. |
148 | * Optionally, build an array of chars that contain the bit numbers allocated. |
149 | */ |
150 | static unsigned long reserve_resources(unsigned long *p, int n, int mmax, |
151 | signed char *idx) |
152 | { |
153 | unsigned long bits = 0; |
154 | int i; |
155 | |
156 | while (n--) { |
157 | i = find_first_bit(addr: p, size: mmax); |
158 | if (i == mmax) |
159 | BUG(); |
160 | __clear_bit(i, p); |
161 | __set_bit(i, &bits); |
162 | if (idx) |
163 | *idx++ = i; |
164 | } |
165 | return bits; |
166 | } |
167 | |
168 | unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, |
169 | signed char *cbmap) |
170 | { |
171 | return reserve_resources(p: &gru->gs_cbr_map, n: cbr_au_count, GRU_CBR_AU, |
172 | idx: cbmap); |
173 | } |
174 | |
175 | unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, |
176 | signed char *dsmap) |
177 | { |
178 | return reserve_resources(p: &gru->gs_dsr_map, n: dsr_au_count, GRU_DSR_AU, |
179 | idx: dsmap); |
180 | } |
181 | |
182 | static void reserve_gru_resources(struct gru_state *gru, |
183 | struct gru_thread_state *gts) |
184 | { |
185 | gru->gs_active_contexts++; |
186 | gts->ts_cbr_map = |
187 | gru_reserve_cb_resources(gru, cbr_au_count: gts->ts_cbr_au_count, |
188 | cbmap: gts->ts_cbr_idx); |
189 | gts->ts_dsr_map = |
190 | gru_reserve_ds_resources(gru, dsr_au_count: gts->ts_dsr_au_count, NULL); |
191 | } |
192 | |
193 | static void free_gru_resources(struct gru_state *gru, |
194 | struct gru_thread_state *gts) |
195 | { |
196 | gru->gs_active_contexts--; |
197 | gru->gs_cbr_map |= gts->ts_cbr_map; |
198 | gru->gs_dsr_map |= gts->ts_dsr_map; |
199 | } |
200 | |
201 | /* |
202 | * Check if a GRU has sufficient free resources to satisfy an allocation |
203 | * request. Note: GRU locks may or may not be held when this is called. If |
204 | * not held, recheck after acquiring the appropriate locks. |
205 | * |
206 | * Returns 1 if sufficient resources, 0 if not |
207 | */ |
208 | static int check_gru_resources(struct gru_state *gru, int cbr_au_count, |
209 | int dsr_au_count, int max_active_contexts) |
210 | { |
211 | return hweight64(gru->gs_cbr_map) >= cbr_au_count |
212 | && hweight64(gru->gs_dsr_map) >= dsr_au_count |
213 | && gru->gs_active_contexts < max_active_contexts; |
214 | } |
215 | |
216 | /* |
217 | * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG |
218 | * context. |
219 | */ |
220 | static int gru_load_mm_tracker(struct gru_state *gru, |
221 | struct gru_thread_state *gts) |
222 | { |
223 | struct gru_mm_struct *gms = gts->ts_gms; |
224 | struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; |
225 | unsigned short ctxbitmap = (1 << gts->ts_ctxnum); |
226 | int asid; |
227 | |
228 | spin_lock(lock: &gms->ms_asid_lock); |
229 | asid = asids->mt_asid; |
230 | |
231 | spin_lock(lock: &gru->gs_asid_lock); |
232 | if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != |
233 | gru->gs_asid_gen)) { |
234 | asid = gru_assign_asid(gru); |
235 | asids->mt_asid = asid; |
236 | asids->mt_asid_gen = gru->gs_asid_gen; |
237 | STAT(asid_new); |
238 | } else { |
239 | STAT(asid_reuse); |
240 | } |
241 | spin_unlock(lock: &gru->gs_asid_lock); |
242 | |
243 | BUG_ON(asids->mt_ctxbitmap & ctxbitmap); |
244 | asids->mt_ctxbitmap |= ctxbitmap; |
245 | if (!test_bit(gru->gs_gid, gms->ms_asidmap)) |
246 | __set_bit(gru->gs_gid, gms->ms_asidmap); |
247 | spin_unlock(lock: &gms->ms_asid_lock); |
248 | |
249 | gru_dbg(grudev, |
250 | "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n" , |
251 | gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, |
252 | gms->ms_asidmap[0]); |
253 | return asid; |
254 | } |
255 | |
256 | static void gru_unload_mm_tracker(struct gru_state *gru, |
257 | struct gru_thread_state *gts) |
258 | { |
259 | struct gru_mm_struct *gms = gts->ts_gms; |
260 | struct gru_mm_tracker *asids; |
261 | unsigned short ctxbitmap; |
262 | |
263 | asids = &gms->ms_asids[gru->gs_gid]; |
264 | ctxbitmap = (1 << gts->ts_ctxnum); |
265 | spin_lock(lock: &gms->ms_asid_lock); |
266 | spin_lock(lock: &gru->gs_asid_lock); |
267 | BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); |
268 | asids->mt_ctxbitmap ^= ctxbitmap; |
269 | gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n" , |
270 | gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); |
271 | spin_unlock(lock: &gru->gs_asid_lock); |
272 | spin_unlock(lock: &gms->ms_asid_lock); |
273 | } |
274 | |
275 | /* |
276 | * Decrement the reference count on a GTS structure. Free the structure |
277 | * if the reference count goes to zero. |
278 | */ |
279 | void gts_drop(struct gru_thread_state *gts) |
280 | { |
281 | if (gts && refcount_dec_and_test(r: >s->ts_refcnt)) { |
282 | if (gts->ts_gms) |
283 | gru_drop_mmu_notifier(gms: gts->ts_gms); |
284 | kfree(objp: gts); |
285 | STAT(gts_free); |
286 | } |
287 | } |
288 | |
289 | /* |
290 | * Locate the GTS structure for the current thread. |
291 | */ |
292 | static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data |
293 | *vdata, int tsid) |
294 | { |
295 | struct gru_thread_state *gts; |
296 | |
297 | list_for_each_entry(gts, &vdata->vd_head, ts_next) |
298 | if (gts->ts_tsid == tsid) |
299 | return gts; |
300 | return NULL; |
301 | } |
302 | |
303 | /* |
304 | * Allocate a thread state structure. |
305 | */ |
306 | struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, |
307 | int cbr_au_count, int dsr_au_count, |
308 | unsigned char tlb_preload_count, int options, int tsid) |
309 | { |
310 | struct gru_thread_state *gts; |
311 | struct gru_mm_struct *gms; |
312 | int bytes; |
313 | |
314 | bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); |
315 | bytes += sizeof(struct gru_thread_state); |
316 | gts = kmalloc(size: bytes, GFP_KERNEL); |
317 | if (!gts) |
318 | return ERR_PTR(error: -ENOMEM); |
319 | |
320 | STAT(gts_alloc); |
321 | memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ |
322 | refcount_set(r: >s->ts_refcnt, n: 1); |
323 | mutex_init(>s->ts_ctxlock); |
324 | gts->ts_cbr_au_count = cbr_au_count; |
325 | gts->ts_dsr_au_count = dsr_au_count; |
326 | gts->ts_tlb_preload_count = tlb_preload_count; |
327 | gts->ts_user_options = options; |
328 | gts->ts_user_blade_id = -1; |
329 | gts->ts_user_chiplet_id = -1; |
330 | gts->ts_tsid = tsid; |
331 | gts->ts_ctxnum = NULLCTX; |
332 | gts->ts_tlb_int_select = -1; |
333 | gts->ts_cch_req_slice = -1; |
334 | gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); |
335 | if (vma) { |
336 | gts->ts_mm = current->mm; |
337 | gts->ts_vma = vma; |
338 | gms = gru_register_mmu_notifier(); |
339 | if (IS_ERR(ptr: gms)) |
340 | goto err; |
341 | gts->ts_gms = gms; |
342 | } |
343 | |
344 | gru_dbg(grudev, "alloc gts %p\n" , gts); |
345 | return gts; |
346 | |
347 | err: |
348 | gts_drop(gts); |
349 | return ERR_CAST(ptr: gms); |
350 | } |
351 | |
352 | /* |
353 | * Allocate a vma private data structure. |
354 | */ |
355 | struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) |
356 | { |
357 | struct gru_vma_data *vdata = NULL; |
358 | |
359 | vdata = kmalloc(size: sizeof(*vdata), GFP_KERNEL); |
360 | if (!vdata) |
361 | return NULL; |
362 | |
363 | STAT(vdata_alloc); |
364 | INIT_LIST_HEAD(list: &vdata->vd_head); |
365 | spin_lock_init(&vdata->vd_lock); |
366 | gru_dbg(grudev, "alloc vdata %p\n" , vdata); |
367 | return vdata; |
368 | } |
369 | |
370 | /* |
371 | * Find the thread state structure for the current thread. |
372 | */ |
373 | struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, |
374 | int tsid) |
375 | { |
376 | struct gru_vma_data *vdata = vma->vm_private_data; |
377 | struct gru_thread_state *gts; |
378 | |
379 | spin_lock(lock: &vdata->vd_lock); |
380 | gts = gru_find_current_gts_nolock(vdata, tsid); |
381 | spin_unlock(lock: &vdata->vd_lock); |
382 | gru_dbg(grudev, "vma %p, gts %p\n" , vma, gts); |
383 | return gts; |
384 | } |
385 | |
386 | /* |
387 | * Allocate a new thread state for a GSEG. Note that races may allow |
388 | * another thread to race to create a gts. |
389 | */ |
390 | struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, |
391 | int tsid) |
392 | { |
393 | struct gru_vma_data *vdata = vma->vm_private_data; |
394 | struct gru_thread_state *gts, *ngts; |
395 | |
396 | gts = gru_alloc_gts(vma, cbr_au_count: vdata->vd_cbr_au_count, |
397 | dsr_au_count: vdata->vd_dsr_au_count, |
398 | tlb_preload_count: vdata->vd_tlb_preload_count, |
399 | options: vdata->vd_user_options, tsid); |
400 | if (IS_ERR(ptr: gts)) |
401 | return gts; |
402 | |
403 | spin_lock(lock: &vdata->vd_lock); |
404 | ngts = gru_find_current_gts_nolock(vdata, tsid); |
405 | if (ngts) { |
406 | gts_drop(gts); |
407 | gts = ngts; |
408 | STAT(gts_double_allocate); |
409 | } else { |
410 | list_add(new: >s->ts_next, head: &vdata->vd_head); |
411 | } |
412 | spin_unlock(lock: &vdata->vd_lock); |
413 | gru_dbg(grudev, "vma %p, gts %p\n" , vma, gts); |
414 | return gts; |
415 | } |
416 | |
417 | /* |
418 | * Free the GRU context assigned to the thread state. |
419 | */ |
420 | static void gru_free_gru_context(struct gru_thread_state *gts) |
421 | { |
422 | struct gru_state *gru; |
423 | |
424 | gru = gts->ts_gru; |
425 | gru_dbg(grudev, "gts %p, gid %d\n" , gts, gru->gs_gid); |
426 | |
427 | spin_lock(lock: &gru->gs_lock); |
428 | gru->gs_gts[gts->ts_ctxnum] = NULL; |
429 | free_gru_resources(gru, gts); |
430 | BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); |
431 | __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); |
432 | gts->ts_ctxnum = NULLCTX; |
433 | gts->ts_gru = NULL; |
434 | gts->ts_blade = -1; |
435 | spin_unlock(lock: &gru->gs_lock); |
436 | |
437 | gts_drop(gts); |
438 | STAT(free_context); |
439 | } |
440 | |
441 | /* |
442 | * Prefetching cachelines help hardware performance. |
443 | * (Strictly a performance enhancement. Not functionally required). |
444 | */ |
445 | static void prefetch_data(void *p, int num, int stride) |
446 | { |
447 | while (num-- > 0) { |
448 | prefetchw(x: p); |
449 | p += stride; |
450 | } |
451 | } |
452 | |
453 | static inline long gru_copy_handle(void *d, void *s) |
454 | { |
455 | memcpy(d, s, GRU_HANDLE_BYTES); |
456 | return GRU_HANDLE_BYTES; |
457 | } |
458 | |
459 | static void gru_prefetch_context(void *gseg, void *cb, void *cbe, |
460 | unsigned long cbrmap, unsigned long length) |
461 | { |
462 | int i, scr; |
463 | |
464 | prefetch_data(p: gseg + GRU_DS_BASE, num: length / GRU_CACHE_LINE_BYTES, |
465 | GRU_CACHE_LINE_BYTES); |
466 | |
467 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { |
468 | prefetch_data(p: cb, num: 1, GRU_CACHE_LINE_BYTES); |
469 | prefetch_data(p: cbe + i * GRU_HANDLE_STRIDE, num: 1, |
470 | GRU_CACHE_LINE_BYTES); |
471 | cb += GRU_HANDLE_STRIDE; |
472 | } |
473 | } |
474 | |
475 | static void gru_load_context_data(void *save, void *grubase, int ctxnum, |
476 | unsigned long cbrmap, unsigned long dsrmap, |
477 | int data_valid) |
478 | { |
479 | void *gseg, *cb, *cbe; |
480 | unsigned long length; |
481 | int i, scr; |
482 | |
483 | gseg = grubase + ctxnum * GRU_GSEG_STRIDE; |
484 | cb = gseg + GRU_CB_BASE; |
485 | cbe = grubase + GRU_CBE_BASE; |
486 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; |
487 | gru_prefetch_context(gseg, cb, cbe, cbrmap, length); |
488 | |
489 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { |
490 | if (data_valid) { |
491 | save += gru_copy_handle(d: cb, s: save); |
492 | save += gru_copy_handle(d: cbe + i * GRU_HANDLE_STRIDE, |
493 | s: save); |
494 | } else { |
495 | memset(cb, 0, GRU_CACHE_LINE_BYTES); |
496 | memset(cbe + i * GRU_HANDLE_STRIDE, 0, |
497 | GRU_CACHE_LINE_BYTES); |
498 | } |
499 | /* Flush CBE to hide race in context restart */ |
500 | mb(); |
501 | gru_flush_cache(p: cbe + i * GRU_HANDLE_STRIDE); |
502 | cb += GRU_HANDLE_STRIDE; |
503 | } |
504 | |
505 | if (data_valid) |
506 | memcpy(gseg + GRU_DS_BASE, save, length); |
507 | else |
508 | memset(gseg + GRU_DS_BASE, 0, length); |
509 | } |
510 | |
511 | static void gru_unload_context_data(void *save, void *grubase, int ctxnum, |
512 | unsigned long cbrmap, unsigned long dsrmap) |
513 | { |
514 | void *gseg, *cb, *cbe; |
515 | unsigned long length; |
516 | int i, scr; |
517 | |
518 | gseg = grubase + ctxnum * GRU_GSEG_STRIDE; |
519 | cb = gseg + GRU_CB_BASE; |
520 | cbe = grubase + GRU_CBE_BASE; |
521 | length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; |
522 | |
523 | /* CBEs may not be coherent. Flush them from cache */ |
524 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) |
525 | gru_flush_cache(p: cbe + i * GRU_HANDLE_STRIDE); |
526 | mb(); /* Let the CL flush complete */ |
527 | |
528 | gru_prefetch_context(gseg, cb, cbe, cbrmap, length); |
529 | |
530 | for_each_cbr_in_allocation_map(i, &cbrmap, scr) { |
531 | save += gru_copy_handle(d: save, s: cb); |
532 | save += gru_copy_handle(d: save, s: cbe + i * GRU_HANDLE_STRIDE); |
533 | cb += GRU_HANDLE_STRIDE; |
534 | } |
535 | memcpy(save, gseg + GRU_DS_BASE, length); |
536 | } |
537 | |
538 | void gru_unload_context(struct gru_thread_state *gts, int savestate) |
539 | { |
540 | struct gru_state *gru = gts->ts_gru; |
541 | struct gru_context_configuration_handle *cch; |
542 | int ctxnum = gts->ts_ctxnum; |
543 | |
544 | if (!is_kernel_context(gts)) |
545 | zap_vma_ptes(vma: gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); |
546 | cch = get_cch(base: gru->gs_gru_base_vaddr, ctxnum); |
547 | |
548 | gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n" , |
549 | gts, gts->ts_cbr_map, gts->ts_dsr_map); |
550 | lock_cch_handle(cch); |
551 | if (cch_interrupt_sync(cch)) |
552 | BUG(); |
553 | |
554 | if (!is_kernel_context(gts)) |
555 | gru_unload_mm_tracker(gru, gts); |
556 | if (savestate) { |
557 | gru_unload_context_data(save: gts->ts_gdata, grubase: gru->gs_gru_base_vaddr, |
558 | ctxnum, cbrmap: gts->ts_cbr_map, |
559 | dsrmap: gts->ts_dsr_map); |
560 | gts->ts_data_valid = 1; |
561 | } |
562 | |
563 | if (cch_deallocate(cch)) |
564 | BUG(); |
565 | unlock_cch_handle(cch); |
566 | |
567 | gru_free_gru_context(gts); |
568 | } |
569 | |
570 | /* |
571 | * Load a GRU context by copying it from the thread data structure in memory |
572 | * to the GRU. |
573 | */ |
574 | void gru_load_context(struct gru_thread_state *gts) |
575 | { |
576 | struct gru_state *gru = gts->ts_gru; |
577 | struct gru_context_configuration_handle *cch; |
578 | int i, err, asid, ctxnum = gts->ts_ctxnum; |
579 | |
580 | cch = get_cch(base: gru->gs_gru_base_vaddr, ctxnum); |
581 | lock_cch_handle(cch); |
582 | cch->tfm_fault_bit_enable = |
583 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL |
584 | || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); |
585 | cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); |
586 | if (cch->tlb_int_enable) { |
587 | gts->ts_tlb_int_select = gru_cpu_fault_map_id(); |
588 | cch->tlb_int_select = gts->ts_tlb_int_select; |
589 | } |
590 | if (gts->ts_cch_req_slice >= 0) { |
591 | cch->req_slice_set_enable = 1; |
592 | cch->req_slice = gts->ts_cch_req_slice; |
593 | } else { |
594 | cch->req_slice_set_enable =0; |
595 | } |
596 | cch->tfm_done_bit_enable = 0; |
597 | cch->dsr_allocation_map = gts->ts_dsr_map; |
598 | cch->cbr_allocation_map = gts->ts_cbr_map; |
599 | |
600 | if (is_kernel_context(gts)) { |
601 | cch->unmap_enable = 1; |
602 | cch->tfm_done_bit_enable = 1; |
603 | cch->cb_int_enable = 1; |
604 | cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ |
605 | } else { |
606 | cch->unmap_enable = 0; |
607 | cch->tfm_done_bit_enable = 0; |
608 | cch->cb_int_enable = 0; |
609 | asid = gru_load_mm_tracker(gru, gts); |
610 | for (i = 0; i < 8; i++) { |
611 | cch->asid[i] = asid + i; |
612 | cch->sizeavail[i] = gts->ts_sizeavail; |
613 | } |
614 | } |
615 | |
616 | err = cch_allocate(cch); |
617 | if (err) { |
618 | gru_dbg(grudev, |
619 | "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n" , |
620 | err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); |
621 | BUG(); |
622 | } |
623 | |
624 | gru_load_context_data(save: gts->ts_gdata, grubase: gru->gs_gru_base_vaddr, ctxnum, |
625 | cbrmap: gts->ts_cbr_map, dsrmap: gts->ts_dsr_map, data_valid: gts->ts_data_valid); |
626 | |
627 | if (cch_start(cch)) |
628 | BUG(); |
629 | unlock_cch_handle(cch); |
630 | |
631 | gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n" , |
632 | gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, |
633 | (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); |
634 | } |
635 | |
636 | /* |
637 | * Update fields in an active CCH: |
638 | * - retarget interrupts on local blade |
639 | * - update sizeavail mask |
640 | */ |
641 | int gru_update_cch(struct gru_thread_state *gts) |
642 | { |
643 | struct gru_context_configuration_handle *cch; |
644 | struct gru_state *gru = gts->ts_gru; |
645 | int i, ctxnum = gts->ts_ctxnum, ret = 0; |
646 | |
647 | cch = get_cch(base: gru->gs_gru_base_vaddr, ctxnum); |
648 | |
649 | lock_cch_handle(cch); |
650 | if (cch->state == CCHSTATE_ACTIVE) { |
651 | if (gru->gs_gts[gts->ts_ctxnum] != gts) |
652 | goto exit; |
653 | if (cch_interrupt(cch)) |
654 | BUG(); |
655 | for (i = 0; i < 8; i++) |
656 | cch->sizeavail[i] = gts->ts_sizeavail; |
657 | gts->ts_tlb_int_select = gru_cpu_fault_map_id(); |
658 | cch->tlb_int_select = gru_cpu_fault_map_id(); |
659 | cch->tfm_fault_bit_enable = |
660 | (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL |
661 | || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); |
662 | if (cch_start(cch)) |
663 | BUG(); |
664 | ret = 1; |
665 | } |
666 | exit: |
667 | unlock_cch_handle(cch); |
668 | return ret; |
669 | } |
670 | |
671 | /* |
672 | * Update CCH tlb interrupt select. Required when all the following is true: |
673 | * - task's GRU context is loaded into a GRU |
674 | * - task is using interrupt notification for TLB faults |
675 | * - task has migrated to a different cpu on the same blade where |
676 | * it was previously running. |
677 | */ |
678 | static int gru_retarget_intr(struct gru_thread_state *gts) |
679 | { |
680 | if (gts->ts_tlb_int_select < 0 |
681 | || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) |
682 | return 0; |
683 | |
684 | gru_dbg(grudev, "retarget from %d to %d\n" , gts->ts_tlb_int_select, |
685 | gru_cpu_fault_map_id()); |
686 | return gru_update_cch(gts); |
687 | } |
688 | |
689 | /* |
690 | * Check if a GRU context is allowed to use a specific chiplet. By default |
691 | * a context is assigned to any blade-local chiplet. However, users can |
692 | * override this. |
693 | * Returns 1 if assignment allowed, 0 otherwise |
694 | */ |
695 | static int gru_check_chiplet_assignment(struct gru_state *gru, |
696 | struct gru_thread_state *gts) |
697 | { |
698 | int blade_id; |
699 | int chiplet_id; |
700 | |
701 | blade_id = gts->ts_user_blade_id; |
702 | if (blade_id < 0) |
703 | blade_id = uv_numa_blade_id(); |
704 | |
705 | chiplet_id = gts->ts_user_chiplet_id; |
706 | return gru->gs_blade_id == blade_id && |
707 | (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); |
708 | } |
709 | |
710 | /* |
711 | * Unload the gru context if it is not assigned to the correct blade or |
712 | * chiplet. Misassignment can occur if the process migrates to a different |
713 | * blade or if the user changes the selected blade/chiplet. |
714 | */ |
715 | int gru_check_context_placement(struct gru_thread_state *gts) |
716 | { |
717 | struct gru_state *gru; |
718 | int ret = 0; |
719 | |
720 | /* |
721 | * If the current task is the context owner, verify that the |
722 | * context is correctly placed. This test is skipped for non-owner |
723 | * references. Pthread apps use non-owner references to the CBRs. |
724 | */ |
725 | gru = gts->ts_gru; |
726 | /* |
727 | * If gru or gts->ts_tgid_owner isn't initialized properly, return |
728 | * success to indicate that the caller does not need to unload the |
729 | * gru context.The caller is responsible for their inspection and |
730 | * reinitialization if needed. |
731 | */ |
732 | if (!gru || gts->ts_tgid_owner != current->tgid) |
733 | return ret; |
734 | |
735 | if (!gru_check_chiplet_assignment(gru, gts)) { |
736 | STAT(check_context_unload); |
737 | ret = -EINVAL; |
738 | } else if (gru_retarget_intr(gts)) { |
739 | STAT(check_context_retarget_intr); |
740 | } |
741 | |
742 | return ret; |
743 | } |
744 | |
745 | |
746 | /* |
747 | * Insufficient GRU resources available on the local blade. Steal a context from |
748 | * a process. This is a hack until a _real_ resource scheduler is written.... |
749 | */ |
750 | #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) |
751 | #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ |
752 | ((g)+1) : &(b)->bs_grus[0]) |
753 | |
754 | static int is_gts_stealable(struct gru_thread_state *gts, |
755 | struct gru_blade_state *bs) |
756 | { |
757 | if (is_kernel_context(gts)) |
758 | return down_write_trylock(sem: &bs->bs_kgts_sema); |
759 | else |
760 | return mutex_trylock(lock: >s->ts_ctxlock); |
761 | } |
762 | |
763 | static void gts_stolen(struct gru_thread_state *gts, |
764 | struct gru_blade_state *bs) |
765 | { |
766 | if (is_kernel_context(gts)) { |
767 | up_write(sem: &bs->bs_kgts_sema); |
768 | STAT(steal_kernel_context); |
769 | } else { |
770 | mutex_unlock(lock: >s->ts_ctxlock); |
771 | STAT(steal_user_context); |
772 | } |
773 | } |
774 | |
775 | void gru_steal_context(struct gru_thread_state *gts) |
776 | { |
777 | struct gru_blade_state *blade; |
778 | struct gru_state *gru, *gru0; |
779 | struct gru_thread_state *ngts = NULL; |
780 | int ctxnum, ctxnum0, flag = 0, cbr, dsr; |
781 | int blade_id; |
782 | |
783 | blade_id = gts->ts_user_blade_id; |
784 | if (blade_id < 0) |
785 | blade_id = uv_numa_blade_id(); |
786 | cbr = gts->ts_cbr_au_count; |
787 | dsr = gts->ts_dsr_au_count; |
788 | |
789 | blade = gru_base[blade_id]; |
790 | spin_lock(lock: &blade->bs_lock); |
791 | |
792 | ctxnum = next_ctxnum(blade->bs_lru_ctxnum); |
793 | gru = blade->bs_lru_gru; |
794 | if (ctxnum == 0) |
795 | gru = next_gru(blade, gru); |
796 | blade->bs_lru_gru = gru; |
797 | blade->bs_lru_ctxnum = ctxnum; |
798 | ctxnum0 = ctxnum; |
799 | gru0 = gru; |
800 | while (1) { |
801 | if (gru_check_chiplet_assignment(gru, gts)) { |
802 | if (check_gru_resources(gru, cbr_au_count: cbr, dsr_au_count: dsr, GRU_NUM_CCH)) |
803 | break; |
804 | spin_lock(lock: &gru->gs_lock); |
805 | for (; ctxnum < GRU_NUM_CCH; ctxnum++) { |
806 | if (flag && gru == gru0 && ctxnum == ctxnum0) |
807 | break; |
808 | ngts = gru->gs_gts[ctxnum]; |
809 | /* |
810 | * We are grabbing locks out of order, so trylock is |
811 | * needed. GTSs are usually not locked, so the odds of |
812 | * success are high. If trylock fails, try to steal a |
813 | * different GSEG. |
814 | */ |
815 | if (ngts && is_gts_stealable(gts: ngts, bs: blade)) |
816 | break; |
817 | ngts = NULL; |
818 | } |
819 | spin_unlock(lock: &gru->gs_lock); |
820 | if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) |
821 | break; |
822 | } |
823 | if (flag && gru == gru0) |
824 | break; |
825 | flag = 1; |
826 | ctxnum = 0; |
827 | gru = next_gru(blade, gru); |
828 | } |
829 | spin_unlock(lock: &blade->bs_lock); |
830 | |
831 | if (ngts) { |
832 | gts->ustats.context_stolen++; |
833 | ngts->ts_steal_jiffies = jiffies; |
834 | gru_unload_context(gts: ngts, savestate: is_kernel_context(gts: ngts) ? 0 : 1); |
835 | gts_stolen(gts: ngts, bs: blade); |
836 | } else { |
837 | STAT(steal_context_failed); |
838 | } |
839 | gru_dbg(grudev, |
840 | "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" |
841 | " avail cb %ld, ds %ld\n" , |
842 | gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), |
843 | hweight64(gru->gs_dsr_map)); |
844 | } |
845 | |
846 | /* |
847 | * Assign a gru context. |
848 | */ |
849 | static int gru_assign_context_number(struct gru_state *gru) |
850 | { |
851 | int ctxnum; |
852 | |
853 | ctxnum = find_first_zero_bit(addr: &gru->gs_context_map, GRU_NUM_CCH); |
854 | __set_bit(ctxnum, &gru->gs_context_map); |
855 | return ctxnum; |
856 | } |
857 | |
858 | /* |
859 | * Scan the GRUs on the local blade & assign a GRU context. |
860 | */ |
861 | struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) |
862 | { |
863 | struct gru_state *gru, *grux; |
864 | int i, max_active_contexts; |
865 | int blade_id = gts->ts_user_blade_id; |
866 | |
867 | if (blade_id < 0) |
868 | blade_id = uv_numa_blade_id(); |
869 | again: |
870 | gru = NULL; |
871 | max_active_contexts = GRU_NUM_CCH; |
872 | for_each_gru_on_blade(grux, blade_id, i) { |
873 | if (!gru_check_chiplet_assignment(gru: grux, gts)) |
874 | continue; |
875 | if (check_gru_resources(gru: grux, cbr_au_count: gts->ts_cbr_au_count, |
876 | dsr_au_count: gts->ts_dsr_au_count, |
877 | max_active_contexts)) { |
878 | gru = grux; |
879 | max_active_contexts = grux->gs_active_contexts; |
880 | if (max_active_contexts == 0) |
881 | break; |
882 | } |
883 | } |
884 | |
885 | if (gru) { |
886 | spin_lock(lock: &gru->gs_lock); |
887 | if (!check_gru_resources(gru, cbr_au_count: gts->ts_cbr_au_count, |
888 | dsr_au_count: gts->ts_dsr_au_count, GRU_NUM_CCH)) { |
889 | spin_unlock(lock: &gru->gs_lock); |
890 | goto again; |
891 | } |
892 | reserve_gru_resources(gru, gts); |
893 | gts->ts_gru = gru; |
894 | gts->ts_blade = gru->gs_blade_id; |
895 | gts->ts_ctxnum = gru_assign_context_number(gru); |
896 | refcount_inc(r: >s->ts_refcnt); |
897 | gru->gs_gts[gts->ts_ctxnum] = gts; |
898 | spin_unlock(lock: &gru->gs_lock); |
899 | |
900 | STAT(assign_context); |
901 | gru_dbg(grudev, |
902 | "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n" , |
903 | gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, |
904 | gts->ts_gru->gs_gid, gts->ts_ctxnum, |
905 | gts->ts_cbr_au_count, gts->ts_dsr_au_count); |
906 | } else { |
907 | gru_dbg(grudev, "failed to allocate a GTS %s\n" , "" ); |
908 | STAT(assign_context_failed); |
909 | } |
910 | |
911 | return gru; |
912 | } |
913 | |
914 | /* |
915 | * gru_nopage |
916 | * |
917 | * Map the user's GRU segment |
918 | * |
919 | * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. |
920 | */ |
921 | vm_fault_t gru_fault(struct vm_fault *vmf) |
922 | { |
923 | struct vm_area_struct *vma = vmf->vma; |
924 | struct gru_thread_state *gts; |
925 | unsigned long paddr, vaddr; |
926 | unsigned long expires; |
927 | |
928 | vaddr = vmf->address; |
929 | gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n" , |
930 | vma, vaddr, GSEG_BASE(vaddr)); |
931 | STAT(nopfn); |
932 | |
933 | /* The following check ensures vaddr is a valid address in the VMA */ |
934 | gts = gru_find_thread_state(vma, TSID(vaddr, vma)); |
935 | if (!gts) |
936 | return VM_FAULT_SIGBUS; |
937 | |
938 | again: |
939 | mutex_lock(>s->ts_ctxlock); |
940 | preempt_disable(); |
941 | |
942 | if (gru_check_context_placement(gts)) { |
943 | preempt_enable(); |
944 | mutex_unlock(lock: >s->ts_ctxlock); |
945 | gru_unload_context(gts, savestate: 1); |
946 | return VM_FAULT_NOPAGE; |
947 | } |
948 | |
949 | if (!gts->ts_gru) { |
950 | STAT(load_user_context); |
951 | if (!gru_assign_gru_context(gts)) { |
952 | preempt_enable(); |
953 | mutex_unlock(lock: >s->ts_ctxlock); |
954 | set_current_state(TASK_INTERRUPTIBLE); |
955 | schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ |
956 | expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY; |
957 | if (time_before(expires, jiffies)) |
958 | gru_steal_context(gts); |
959 | goto again; |
960 | } |
961 | gru_load_context(gts); |
962 | paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); |
963 | remap_pfn_range(vma, addr: vaddr & ~(GRU_GSEG_PAGESIZE - 1), |
964 | pfn: paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, |
965 | vma->vm_page_prot); |
966 | } |
967 | |
968 | preempt_enable(); |
969 | mutex_unlock(lock: >s->ts_ctxlock); |
970 | |
971 | return VM_FAULT_NOPAGE; |
972 | } |
973 | |
974 | |