1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2015 - 2020 Intel Corporation. |
4 | */ |
5 | |
6 | #include <linux/topology.h> |
7 | #include <linux/cpumask.h> |
8 | #include <linux/interrupt.h> |
9 | #include <linux/numa.h> |
10 | |
11 | #include "hfi.h" |
12 | #include "affinity.h" |
13 | #include "sdma.h" |
14 | #include "trace.h" |
15 | |
16 | struct hfi1_affinity_node_list node_affinity = { |
17 | .list = LIST_HEAD_INIT(node_affinity.list), |
18 | .lock = __MUTEX_INITIALIZER(node_affinity.lock) |
19 | }; |
20 | |
21 | /* Name of IRQ types, indexed by enum irq_type */ |
22 | static const char * const irq_type_names[] = { |
23 | "SDMA" , |
24 | "RCVCTXT" , |
25 | "NETDEVCTXT" , |
26 | "GENERAL" , |
27 | "OTHER" , |
28 | }; |
29 | |
30 | /* Per NUMA node count of HFI devices */ |
31 | static unsigned int *hfi1_per_node_cntr; |
32 | |
33 | static inline void init_cpu_mask_set(struct cpu_mask_set *set) |
34 | { |
35 | cpumask_clear(dstp: &set->mask); |
36 | cpumask_clear(dstp: &set->used); |
37 | set->gen = 0; |
38 | } |
39 | |
40 | /* Increment generation of CPU set if needed */ |
41 | static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) |
42 | { |
43 | if (cpumask_equal(src1p: &set->mask, src2p: &set->used)) { |
44 | /* |
45 | * We've used up all the CPUs, bump up the generation |
46 | * and reset the 'used' map |
47 | */ |
48 | set->gen++; |
49 | cpumask_clear(dstp: &set->used); |
50 | } |
51 | } |
52 | |
53 | static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) |
54 | { |
55 | if (cpumask_empty(srcp: &set->used) && set->gen) { |
56 | set->gen--; |
57 | cpumask_copy(dstp: &set->used, srcp: &set->mask); |
58 | } |
59 | } |
60 | |
61 | /* Get the first CPU from the list of unused CPUs in a CPU set data structure */ |
62 | static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) |
63 | { |
64 | int cpu; |
65 | |
66 | if (!diff || !set) |
67 | return -EINVAL; |
68 | |
69 | _cpu_mask_set_gen_inc(set); |
70 | |
71 | /* Find out CPUs left in CPU mask */ |
72 | cpumask_andnot(dstp: diff, src1p: &set->mask, src2p: &set->used); |
73 | |
74 | cpu = cpumask_first(srcp: diff); |
75 | if (cpu >= nr_cpu_ids) /* empty */ |
76 | cpu = -EINVAL; |
77 | else |
78 | cpumask_set_cpu(cpu, dstp: &set->used); |
79 | |
80 | return cpu; |
81 | } |
82 | |
83 | static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) |
84 | { |
85 | if (!set) |
86 | return; |
87 | |
88 | cpumask_clear_cpu(cpu, dstp: &set->used); |
89 | _cpu_mask_set_gen_dec(set); |
90 | } |
91 | |
92 | /* Initialize non-HT cpu cores mask */ |
93 | void init_real_cpu_mask(void) |
94 | { |
95 | int possible, curr_cpu, i, ht; |
96 | |
97 | cpumask_clear(dstp: &node_affinity.real_cpu_mask); |
98 | |
99 | /* Start with cpu online mask as the real cpu mask */ |
100 | cpumask_copy(dstp: &node_affinity.real_cpu_mask, cpu_online_mask); |
101 | |
102 | /* |
103 | * Remove HT cores from the real cpu mask. Do this in two steps below. |
104 | */ |
105 | possible = cpumask_weight(srcp: &node_affinity.real_cpu_mask); |
106 | ht = cpumask_weight(topology_sibling_cpumask( |
107 | cpumask_first(&node_affinity.real_cpu_mask))); |
108 | /* |
109 | * Step 1. Skip over the first N HT siblings and use them as the |
110 | * "real" cores. Assumes that HT cores are not enumerated in |
111 | * succession (except in the single core case). |
112 | */ |
113 | curr_cpu = cpumask_first(srcp: &node_affinity.real_cpu_mask); |
114 | for (i = 0; i < possible / ht; i++) |
115 | curr_cpu = cpumask_next(n: curr_cpu, srcp: &node_affinity.real_cpu_mask); |
116 | /* |
117 | * Step 2. Remove the remaining HT siblings. Use cpumask_next() to |
118 | * skip any gaps. |
119 | */ |
120 | for (; i < possible; i++) { |
121 | cpumask_clear_cpu(cpu: curr_cpu, dstp: &node_affinity.real_cpu_mask); |
122 | curr_cpu = cpumask_next(n: curr_cpu, srcp: &node_affinity.real_cpu_mask); |
123 | } |
124 | } |
125 | |
126 | int node_affinity_init(void) |
127 | { |
128 | int node; |
129 | struct pci_dev *dev = NULL; |
130 | const struct pci_device_id *ids = hfi1_pci_tbl; |
131 | |
132 | cpumask_clear(dstp: &node_affinity.proc.used); |
133 | cpumask_copy(dstp: &node_affinity.proc.mask, cpu_online_mask); |
134 | |
135 | node_affinity.proc.gen = 0; |
136 | node_affinity.num_core_siblings = |
137 | cpumask_weight(topology_sibling_cpumask( |
138 | cpumask_first(&node_affinity.proc.mask) |
139 | )); |
140 | node_affinity.num_possible_nodes = num_possible_nodes(); |
141 | node_affinity.num_online_nodes = num_online_nodes(); |
142 | node_affinity.num_online_cpus = num_online_cpus(); |
143 | |
144 | /* |
145 | * The real cpu mask is part of the affinity struct but it has to be |
146 | * initialized early. It is needed to calculate the number of user |
147 | * contexts in set_up_context_variables(). |
148 | */ |
149 | init_real_cpu_mask(); |
150 | |
151 | hfi1_per_node_cntr = kcalloc(n: node_affinity.num_possible_nodes, |
152 | size: sizeof(*hfi1_per_node_cntr), GFP_KERNEL); |
153 | if (!hfi1_per_node_cntr) |
154 | return -ENOMEM; |
155 | |
156 | while (ids->vendor) { |
157 | dev = NULL; |
158 | while ((dev = pci_get_device(vendor: ids->vendor, device: ids->device, from: dev))) { |
159 | node = pcibus_to_node(dev->bus); |
160 | if (node < 0) |
161 | goto out; |
162 | |
163 | hfi1_per_node_cntr[node]++; |
164 | } |
165 | ids++; |
166 | } |
167 | |
168 | return 0; |
169 | |
170 | out: |
171 | /* |
172 | * Invalid PCI NUMA node information found, note it, and populate |
173 | * our database 1:1. |
174 | */ |
175 | pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n" ); |
176 | pr_err("HFI: System BIOS may need to be upgraded\n" ); |
177 | for (node = 0; node < node_affinity.num_possible_nodes; node++) |
178 | hfi1_per_node_cntr[node] = 1; |
179 | |
180 | pci_dev_put(dev); |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | static void node_affinity_destroy(struct hfi1_affinity_node *entry) |
186 | { |
187 | free_percpu(pdata: entry->comp_vect_affinity); |
188 | kfree(objp: entry); |
189 | } |
190 | |
191 | void node_affinity_destroy_all(void) |
192 | { |
193 | struct list_head *pos, *q; |
194 | struct hfi1_affinity_node *entry; |
195 | |
196 | mutex_lock(&node_affinity.lock); |
197 | list_for_each_safe(pos, q, &node_affinity.list) { |
198 | entry = list_entry(pos, struct hfi1_affinity_node, |
199 | list); |
200 | list_del(entry: pos); |
201 | node_affinity_destroy(entry); |
202 | } |
203 | mutex_unlock(lock: &node_affinity.lock); |
204 | kfree(objp: hfi1_per_node_cntr); |
205 | } |
206 | |
207 | static struct hfi1_affinity_node *node_affinity_allocate(int node) |
208 | { |
209 | struct hfi1_affinity_node *entry; |
210 | |
211 | entry = kzalloc(size: sizeof(*entry), GFP_KERNEL); |
212 | if (!entry) |
213 | return NULL; |
214 | entry->node = node; |
215 | entry->comp_vect_affinity = alloc_percpu(u16); |
216 | INIT_LIST_HEAD(list: &entry->list); |
217 | |
218 | return entry; |
219 | } |
220 | |
221 | /* |
222 | * It appends an entry to the list. |
223 | * It *must* be called with node_affinity.lock held. |
224 | */ |
225 | static void node_affinity_add_tail(struct hfi1_affinity_node *entry) |
226 | { |
227 | list_add_tail(new: &entry->list, head: &node_affinity.list); |
228 | } |
229 | |
230 | /* It must be called with node_affinity.lock held */ |
231 | static struct hfi1_affinity_node *node_affinity_lookup(int node) |
232 | { |
233 | struct hfi1_affinity_node *entry; |
234 | |
235 | list_for_each_entry(entry, &node_affinity.list, list) { |
236 | if (entry->node == node) |
237 | return entry; |
238 | } |
239 | |
240 | return NULL; |
241 | } |
242 | |
243 | static int per_cpu_affinity_get(cpumask_var_t possible_cpumask, |
244 | u16 __percpu *comp_vect_affinity) |
245 | { |
246 | int curr_cpu; |
247 | u16 cntr; |
248 | u16 prev_cntr; |
249 | int ret_cpu; |
250 | |
251 | if (!possible_cpumask) { |
252 | ret_cpu = -EINVAL; |
253 | goto fail; |
254 | } |
255 | |
256 | if (!comp_vect_affinity) { |
257 | ret_cpu = -EINVAL; |
258 | goto fail; |
259 | } |
260 | |
261 | ret_cpu = cpumask_first(srcp: possible_cpumask); |
262 | if (ret_cpu >= nr_cpu_ids) { |
263 | ret_cpu = -EINVAL; |
264 | goto fail; |
265 | } |
266 | |
267 | prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); |
268 | for_each_cpu(curr_cpu, possible_cpumask) { |
269 | cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); |
270 | |
271 | if (cntr < prev_cntr) { |
272 | ret_cpu = curr_cpu; |
273 | prev_cntr = cntr; |
274 | } |
275 | } |
276 | |
277 | *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; |
278 | |
279 | fail: |
280 | return ret_cpu; |
281 | } |
282 | |
283 | static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, |
284 | u16 __percpu *comp_vect_affinity) |
285 | { |
286 | int curr_cpu; |
287 | int max_cpu; |
288 | u16 cntr; |
289 | u16 prev_cntr; |
290 | |
291 | if (!possible_cpumask) |
292 | return -EINVAL; |
293 | |
294 | if (!comp_vect_affinity) |
295 | return -EINVAL; |
296 | |
297 | max_cpu = cpumask_first(srcp: possible_cpumask); |
298 | if (max_cpu >= nr_cpu_ids) |
299 | return -EINVAL; |
300 | |
301 | prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); |
302 | for_each_cpu(curr_cpu, possible_cpumask) { |
303 | cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); |
304 | |
305 | if (cntr > prev_cntr) { |
306 | max_cpu = curr_cpu; |
307 | prev_cntr = cntr; |
308 | } |
309 | } |
310 | |
311 | *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; |
312 | |
313 | return max_cpu; |
314 | } |
315 | |
316 | /* |
317 | * Non-interrupt CPUs are used first, then interrupt CPUs. |
318 | * Two already allocated cpu masks must be passed. |
319 | */ |
320 | static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, |
321 | struct hfi1_affinity_node *entry, |
322 | cpumask_var_t non_intr_cpus, |
323 | cpumask_var_t available_cpus) |
324 | __must_hold(&node_affinity.lock) |
325 | { |
326 | int cpu; |
327 | struct cpu_mask_set *set = dd->comp_vect; |
328 | |
329 | lockdep_assert_held(&node_affinity.lock); |
330 | if (!non_intr_cpus) { |
331 | cpu = -1; |
332 | goto fail; |
333 | } |
334 | |
335 | if (!available_cpus) { |
336 | cpu = -1; |
337 | goto fail; |
338 | } |
339 | |
340 | /* Available CPUs for pinning completion vectors */ |
341 | _cpu_mask_set_gen_inc(set); |
342 | cpumask_andnot(dstp: available_cpus, src1p: &set->mask, src2p: &set->used); |
343 | |
344 | /* Available CPUs without SDMA engine interrupts */ |
345 | cpumask_andnot(dstp: non_intr_cpus, src1p: available_cpus, |
346 | src2p: &entry->def_intr.used); |
347 | |
348 | /* If there are non-interrupt CPUs available, use them first */ |
349 | if (!cpumask_empty(srcp: non_intr_cpus)) |
350 | cpu = cpumask_first(srcp: non_intr_cpus); |
351 | else /* Otherwise, use interrupt CPUs */ |
352 | cpu = cpumask_first(srcp: available_cpus); |
353 | |
354 | if (cpu >= nr_cpu_ids) { /* empty */ |
355 | cpu = -1; |
356 | goto fail; |
357 | } |
358 | cpumask_set_cpu(cpu, dstp: &set->used); |
359 | |
360 | fail: |
361 | return cpu; |
362 | } |
363 | |
364 | static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) |
365 | { |
366 | struct cpu_mask_set *set = dd->comp_vect; |
367 | |
368 | if (cpu < 0) |
369 | return; |
370 | |
371 | cpu_mask_set_put(set, cpu); |
372 | } |
373 | |
374 | /* _dev_comp_vect_mappings_destroy() is reentrant */ |
375 | static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) |
376 | { |
377 | int i, cpu; |
378 | |
379 | if (!dd->comp_vect_mappings) |
380 | return; |
381 | |
382 | for (i = 0; i < dd->comp_vect_possible_cpus; i++) { |
383 | cpu = dd->comp_vect_mappings[i]; |
384 | _dev_comp_vect_cpu_put(dd, cpu); |
385 | dd->comp_vect_mappings[i] = -1; |
386 | hfi1_cdbg(AFFINITY, |
387 | "[%s] Release CPU %d from completion vector %d" , |
388 | rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); |
389 | } |
390 | |
391 | kfree(objp: dd->comp_vect_mappings); |
392 | dd->comp_vect_mappings = NULL; |
393 | } |
394 | |
395 | /* |
396 | * This function creates the table for looking up CPUs for completion vectors. |
397 | * num_comp_vectors needs to have been initilized before calling this function. |
398 | */ |
399 | static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, |
400 | struct hfi1_affinity_node *entry) |
401 | __must_hold(&node_affinity.lock) |
402 | { |
403 | int i, cpu, ret; |
404 | cpumask_var_t non_intr_cpus; |
405 | cpumask_var_t available_cpus; |
406 | |
407 | lockdep_assert_held(&node_affinity.lock); |
408 | |
409 | if (!zalloc_cpumask_var(mask: &non_intr_cpus, GFP_KERNEL)) |
410 | return -ENOMEM; |
411 | |
412 | if (!zalloc_cpumask_var(mask: &available_cpus, GFP_KERNEL)) { |
413 | free_cpumask_var(mask: non_intr_cpus); |
414 | return -ENOMEM; |
415 | } |
416 | |
417 | dd->comp_vect_mappings = kcalloc(n: dd->comp_vect_possible_cpus, |
418 | size: sizeof(*dd->comp_vect_mappings), |
419 | GFP_KERNEL); |
420 | if (!dd->comp_vect_mappings) { |
421 | ret = -ENOMEM; |
422 | goto fail; |
423 | } |
424 | for (i = 0; i < dd->comp_vect_possible_cpus; i++) |
425 | dd->comp_vect_mappings[i] = -1; |
426 | |
427 | for (i = 0; i < dd->comp_vect_possible_cpus; i++) { |
428 | cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, |
429 | available_cpus); |
430 | if (cpu < 0) { |
431 | ret = -EINVAL; |
432 | goto fail; |
433 | } |
434 | |
435 | dd->comp_vect_mappings[i] = cpu; |
436 | hfi1_cdbg(AFFINITY, |
437 | "[%s] Completion Vector %d -> CPU %d" , |
438 | rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); |
439 | } |
440 | |
441 | free_cpumask_var(mask: available_cpus); |
442 | free_cpumask_var(mask: non_intr_cpus); |
443 | return 0; |
444 | |
445 | fail: |
446 | free_cpumask_var(mask: available_cpus); |
447 | free_cpumask_var(mask: non_intr_cpus); |
448 | _dev_comp_vect_mappings_destroy(dd); |
449 | |
450 | return ret; |
451 | } |
452 | |
453 | int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) |
454 | { |
455 | int ret; |
456 | struct hfi1_affinity_node *entry; |
457 | |
458 | mutex_lock(&node_affinity.lock); |
459 | entry = node_affinity_lookup(node: dd->node); |
460 | if (!entry) { |
461 | ret = -EINVAL; |
462 | goto unlock; |
463 | } |
464 | ret = _dev_comp_vect_mappings_create(dd, entry); |
465 | unlock: |
466 | mutex_unlock(lock: &node_affinity.lock); |
467 | |
468 | return ret; |
469 | } |
470 | |
471 | void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) |
472 | { |
473 | _dev_comp_vect_mappings_destroy(dd); |
474 | } |
475 | |
476 | int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) |
477 | { |
478 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
479 | struct hfi1_devdata *dd = dd_from_dev(dev: verbs_dev); |
480 | |
481 | if (!dd->comp_vect_mappings) |
482 | return -EINVAL; |
483 | if (comp_vect >= dd->comp_vect_possible_cpus) |
484 | return -EINVAL; |
485 | |
486 | return dd->comp_vect_mappings[comp_vect]; |
487 | } |
488 | |
489 | /* |
490 | * It assumes dd->comp_vect_possible_cpus is available. |
491 | */ |
492 | static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, |
493 | struct hfi1_affinity_node *entry, |
494 | bool first_dev_init) |
495 | __must_hold(&node_affinity.lock) |
496 | { |
497 | int i, j, curr_cpu; |
498 | int possible_cpus_comp_vect = 0; |
499 | struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; |
500 | |
501 | lockdep_assert_held(&node_affinity.lock); |
502 | /* |
503 | * If there's only one CPU available for completion vectors, then |
504 | * there will only be one completion vector available. Othewise, |
505 | * the number of completion vector available will be the number of |
506 | * available CPUs divide it by the number of devices in the |
507 | * local NUMA node. |
508 | */ |
509 | if (cpumask_weight(srcp: &entry->comp_vect_mask) == 1) { |
510 | possible_cpus_comp_vect = 1; |
511 | dd_dev_warn(dd, |
512 | "Number of kernel receive queues is too large for completion vector affinity to be effective\n" ); |
513 | } else { |
514 | possible_cpus_comp_vect += |
515 | cpumask_weight(srcp: &entry->comp_vect_mask) / |
516 | hfi1_per_node_cntr[dd->node]; |
517 | |
518 | /* |
519 | * If the completion vector CPUs available doesn't divide |
520 | * evenly among devices, then the first device device to be |
521 | * initialized gets an extra CPU. |
522 | */ |
523 | if (first_dev_init && |
524 | cpumask_weight(srcp: &entry->comp_vect_mask) % |
525 | hfi1_per_node_cntr[dd->node] != 0) |
526 | possible_cpus_comp_vect++; |
527 | } |
528 | |
529 | dd->comp_vect_possible_cpus = possible_cpus_comp_vect; |
530 | |
531 | /* Reserving CPUs for device completion vector */ |
532 | for (i = 0; i < dd->comp_vect_possible_cpus; i++) { |
533 | curr_cpu = per_cpu_affinity_get(possible_cpumask: &entry->comp_vect_mask, |
534 | comp_vect_affinity: entry->comp_vect_affinity); |
535 | if (curr_cpu < 0) |
536 | goto fail; |
537 | |
538 | cpumask_set_cpu(cpu: curr_cpu, dstp: dev_comp_vect_mask); |
539 | } |
540 | |
541 | hfi1_cdbg(AFFINITY, |
542 | "[%s] Completion vector affinity CPU set(s) %*pbl" , |
543 | rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), |
544 | cpumask_pr_args(dev_comp_vect_mask)); |
545 | |
546 | return 0; |
547 | |
548 | fail: |
549 | for (j = 0; j < i; j++) |
550 | per_cpu_affinity_put_max(possible_cpumask: &entry->comp_vect_mask, |
551 | comp_vect_affinity: entry->comp_vect_affinity); |
552 | |
553 | return curr_cpu; |
554 | } |
555 | |
556 | /* |
557 | * It assumes dd->comp_vect_possible_cpus is available. |
558 | */ |
559 | static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, |
560 | struct hfi1_affinity_node *entry) |
561 | __must_hold(&node_affinity.lock) |
562 | { |
563 | int i, cpu; |
564 | |
565 | lockdep_assert_held(&node_affinity.lock); |
566 | if (!dd->comp_vect_possible_cpus) |
567 | return; |
568 | |
569 | for (i = 0; i < dd->comp_vect_possible_cpus; i++) { |
570 | cpu = per_cpu_affinity_put_max(possible_cpumask: &dd->comp_vect->mask, |
571 | comp_vect_affinity: entry->comp_vect_affinity); |
572 | /* Clearing CPU in device completion vector cpu mask */ |
573 | if (cpu >= 0) |
574 | cpumask_clear_cpu(cpu, dstp: &dd->comp_vect->mask); |
575 | } |
576 | |
577 | dd->comp_vect_possible_cpus = 0; |
578 | } |
579 | |
580 | /* |
581 | * Interrupt affinity. |
582 | * |
583 | * non-rcv avail gets a default mask that |
584 | * starts as possible cpus with threads reset |
585 | * and each rcv avail reset. |
586 | * |
587 | * rcv avail gets node relative 1 wrapping back |
588 | * to the node relative 1 as necessary. |
589 | * |
590 | */ |
591 | int hfi1_dev_affinity_init(struct hfi1_devdata *dd) |
592 | { |
593 | struct hfi1_affinity_node *entry; |
594 | const struct cpumask *local_mask; |
595 | int curr_cpu, possible, i, ret; |
596 | bool new_entry = false; |
597 | |
598 | local_mask = cpumask_of_node(node: dd->node); |
599 | if (cpumask_first(srcp: local_mask) >= nr_cpu_ids) |
600 | local_mask = topology_core_cpumask(0); |
601 | |
602 | mutex_lock(&node_affinity.lock); |
603 | entry = node_affinity_lookup(node: dd->node); |
604 | |
605 | /* |
606 | * If this is the first time this NUMA node's affinity is used, |
607 | * create an entry in the global affinity structure and initialize it. |
608 | */ |
609 | if (!entry) { |
610 | entry = node_affinity_allocate(node: dd->node); |
611 | if (!entry) { |
612 | dd_dev_err(dd, |
613 | "Unable to allocate global affinity node\n" ); |
614 | ret = -ENOMEM; |
615 | goto fail; |
616 | } |
617 | new_entry = true; |
618 | |
619 | init_cpu_mask_set(set: &entry->def_intr); |
620 | init_cpu_mask_set(set: &entry->rcv_intr); |
621 | cpumask_clear(dstp: &entry->comp_vect_mask); |
622 | cpumask_clear(dstp: &entry->general_intr_mask); |
623 | /* Use the "real" cpu mask of this node as the default */ |
624 | cpumask_and(dstp: &entry->def_intr.mask, src1p: &node_affinity.real_cpu_mask, |
625 | src2p: local_mask); |
626 | |
627 | /* fill in the receive list */ |
628 | possible = cpumask_weight(srcp: &entry->def_intr.mask); |
629 | curr_cpu = cpumask_first(srcp: &entry->def_intr.mask); |
630 | |
631 | if (possible == 1) { |
632 | /* only one CPU, everyone will use it */ |
633 | cpumask_set_cpu(cpu: curr_cpu, dstp: &entry->rcv_intr.mask); |
634 | cpumask_set_cpu(cpu: curr_cpu, dstp: &entry->general_intr_mask); |
635 | } else { |
636 | /* |
637 | * The general/control context will be the first CPU in |
638 | * the default list, so it is removed from the default |
639 | * list and added to the general interrupt list. |
640 | */ |
641 | cpumask_clear_cpu(cpu: curr_cpu, dstp: &entry->def_intr.mask); |
642 | cpumask_set_cpu(cpu: curr_cpu, dstp: &entry->general_intr_mask); |
643 | curr_cpu = cpumask_next(n: curr_cpu, |
644 | srcp: &entry->def_intr.mask); |
645 | |
646 | /* |
647 | * Remove the remaining kernel receive queues from |
648 | * the default list and add them to the receive list. |
649 | */ |
650 | for (i = 0; |
651 | i < (dd->n_krcv_queues - 1) * |
652 | hfi1_per_node_cntr[dd->node]; |
653 | i++) { |
654 | cpumask_clear_cpu(cpu: curr_cpu, |
655 | dstp: &entry->def_intr.mask); |
656 | cpumask_set_cpu(cpu: curr_cpu, |
657 | dstp: &entry->rcv_intr.mask); |
658 | curr_cpu = cpumask_next(n: curr_cpu, |
659 | srcp: &entry->def_intr.mask); |
660 | if (curr_cpu >= nr_cpu_ids) |
661 | break; |
662 | } |
663 | |
664 | /* |
665 | * If there ends up being 0 CPU cores leftover for SDMA |
666 | * engines, use the same CPU cores as general/control |
667 | * context. |
668 | */ |
669 | if (cpumask_empty(srcp: &entry->def_intr.mask)) |
670 | cpumask_copy(dstp: &entry->def_intr.mask, |
671 | srcp: &entry->general_intr_mask); |
672 | } |
673 | |
674 | /* Determine completion vector CPUs for the entire node */ |
675 | cpumask_and(dstp: &entry->comp_vect_mask, |
676 | src1p: &node_affinity.real_cpu_mask, src2p: local_mask); |
677 | cpumask_andnot(dstp: &entry->comp_vect_mask, |
678 | src1p: &entry->comp_vect_mask, |
679 | src2p: &entry->rcv_intr.mask); |
680 | cpumask_andnot(dstp: &entry->comp_vect_mask, |
681 | src1p: &entry->comp_vect_mask, |
682 | src2p: &entry->general_intr_mask); |
683 | |
684 | /* |
685 | * If there ends up being 0 CPU cores leftover for completion |
686 | * vectors, use the same CPU core as the general/control |
687 | * context. |
688 | */ |
689 | if (cpumask_empty(srcp: &entry->comp_vect_mask)) |
690 | cpumask_copy(dstp: &entry->comp_vect_mask, |
691 | srcp: &entry->general_intr_mask); |
692 | } |
693 | |
694 | ret = _dev_comp_vect_cpu_mask_init(dd, entry, first_dev_init: new_entry); |
695 | if (ret < 0) |
696 | goto fail; |
697 | |
698 | if (new_entry) |
699 | node_affinity_add_tail(entry); |
700 | |
701 | dd->affinity_entry = entry; |
702 | mutex_unlock(lock: &node_affinity.lock); |
703 | |
704 | return 0; |
705 | |
706 | fail: |
707 | if (new_entry) |
708 | node_affinity_destroy(entry); |
709 | mutex_unlock(lock: &node_affinity.lock); |
710 | return ret; |
711 | } |
712 | |
713 | void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) |
714 | { |
715 | struct hfi1_affinity_node *entry; |
716 | |
717 | mutex_lock(&node_affinity.lock); |
718 | if (!dd->affinity_entry) |
719 | goto unlock; |
720 | entry = node_affinity_lookup(node: dd->node); |
721 | if (!entry) |
722 | goto unlock; |
723 | |
724 | /* |
725 | * Free device completion vector CPUs to be used by future |
726 | * completion vectors |
727 | */ |
728 | _dev_comp_vect_cpu_mask_clean_up(dd, entry); |
729 | unlock: |
730 | dd->affinity_entry = NULL; |
731 | mutex_unlock(lock: &node_affinity.lock); |
732 | } |
733 | |
734 | /* |
735 | * Function updates the irq affinity hint for msix after it has been changed |
736 | * by the user using the /proc/irq interface. This function only accepts |
737 | * one cpu in the mask. |
738 | */ |
739 | static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) |
740 | { |
741 | struct sdma_engine *sde = msix->arg; |
742 | struct hfi1_devdata *dd = sde->dd; |
743 | struct hfi1_affinity_node *entry; |
744 | struct cpu_mask_set *set; |
745 | int i, old_cpu; |
746 | |
747 | if (cpu > num_online_cpus() || cpu == sde->cpu) |
748 | return; |
749 | |
750 | mutex_lock(&node_affinity.lock); |
751 | entry = node_affinity_lookup(node: dd->node); |
752 | if (!entry) |
753 | goto unlock; |
754 | |
755 | old_cpu = sde->cpu; |
756 | sde->cpu = cpu; |
757 | cpumask_clear(dstp: &msix->mask); |
758 | cpumask_set_cpu(cpu, dstp: &msix->mask); |
759 | dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n" , |
760 | msix->irq, irq_type_names[msix->type], |
761 | sde->this_idx, cpu); |
762 | irq_set_affinity_hint(irq: msix->irq, m: &msix->mask); |
763 | |
764 | /* |
765 | * Set the new cpu in the hfi1_affinity_node and clean |
766 | * the old cpu if it is not used by any other IRQ |
767 | */ |
768 | set = &entry->def_intr; |
769 | cpumask_set_cpu(cpu, dstp: &set->mask); |
770 | cpumask_set_cpu(cpu, dstp: &set->used); |
771 | for (i = 0; i < dd->msix_info.max_requested; i++) { |
772 | struct hfi1_msix_entry *other_msix; |
773 | |
774 | other_msix = &dd->msix_info.msix_entries[i]; |
775 | if (other_msix->type != IRQ_SDMA || other_msix == msix) |
776 | continue; |
777 | |
778 | if (cpumask_test_cpu(cpu: old_cpu, cpumask: &other_msix->mask)) |
779 | goto unlock; |
780 | } |
781 | cpumask_clear_cpu(cpu: old_cpu, dstp: &set->mask); |
782 | cpumask_clear_cpu(cpu: old_cpu, dstp: &set->used); |
783 | unlock: |
784 | mutex_unlock(lock: &node_affinity.lock); |
785 | } |
786 | |
787 | static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, |
788 | const cpumask_t *mask) |
789 | { |
790 | int cpu = cpumask_first(srcp: mask); |
791 | struct hfi1_msix_entry *msix = container_of(notify, |
792 | struct hfi1_msix_entry, |
793 | notify); |
794 | |
795 | /* Only one CPU configuration supported currently */ |
796 | hfi1_update_sdma_affinity(msix, cpu); |
797 | } |
798 | |
799 | static void hfi1_irq_notifier_release(struct kref *ref) |
800 | { |
801 | /* |
802 | * This is required by affinity notifier. We don't have anything to |
803 | * free here. |
804 | */ |
805 | } |
806 | |
807 | static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) |
808 | { |
809 | struct irq_affinity_notify *notify = &msix->notify; |
810 | |
811 | notify->irq = msix->irq; |
812 | notify->notify = hfi1_irq_notifier_notify; |
813 | notify->release = hfi1_irq_notifier_release; |
814 | |
815 | if (irq_set_affinity_notifier(irq: notify->irq, notify)) |
816 | pr_err("Failed to register sdma irq affinity notifier for irq %d\n" , |
817 | notify->irq); |
818 | } |
819 | |
820 | static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) |
821 | { |
822 | struct irq_affinity_notify *notify = &msix->notify; |
823 | |
824 | if (irq_set_affinity_notifier(irq: notify->irq, NULL)) |
825 | pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n" , |
826 | notify->irq); |
827 | } |
828 | |
829 | /* |
830 | * Function sets the irq affinity for msix. |
831 | * It *must* be called with node_affinity.lock held. |
832 | */ |
833 | static int get_irq_affinity(struct hfi1_devdata *dd, |
834 | struct hfi1_msix_entry *msix) |
835 | { |
836 | cpumask_var_t diff; |
837 | struct hfi1_affinity_node *entry; |
838 | struct cpu_mask_set *set = NULL; |
839 | struct sdma_engine *sde = NULL; |
840 | struct hfi1_ctxtdata *rcd = NULL; |
841 | char [64]; |
842 | int cpu = -1; |
843 | |
844 | extra[0] = '\0'; |
845 | cpumask_clear(dstp: &msix->mask); |
846 | |
847 | entry = node_affinity_lookup(node: dd->node); |
848 | |
849 | switch (msix->type) { |
850 | case IRQ_SDMA: |
851 | sde = (struct sdma_engine *)msix->arg; |
852 | scnprintf(buf: extra, size: 64, fmt: "engine %u" , sde->this_idx); |
853 | set = &entry->def_intr; |
854 | break; |
855 | case IRQ_GENERAL: |
856 | cpu = cpumask_first(srcp: &entry->general_intr_mask); |
857 | break; |
858 | case IRQ_RCVCTXT: |
859 | rcd = (struct hfi1_ctxtdata *)msix->arg; |
860 | if (rcd->ctxt == HFI1_CTRL_CTXT) |
861 | cpu = cpumask_first(srcp: &entry->general_intr_mask); |
862 | else |
863 | set = &entry->rcv_intr; |
864 | scnprintf(buf: extra, size: 64, fmt: "ctxt %u" , rcd->ctxt); |
865 | break; |
866 | case IRQ_NETDEVCTXT: |
867 | rcd = (struct hfi1_ctxtdata *)msix->arg; |
868 | set = &entry->def_intr; |
869 | scnprintf(buf: extra, size: 64, fmt: "ctxt %u" , rcd->ctxt); |
870 | break; |
871 | default: |
872 | dd_dev_err(dd, "Invalid IRQ type %d\n" , msix->type); |
873 | return -EINVAL; |
874 | } |
875 | |
876 | /* |
877 | * The general and control contexts are placed on a particular |
878 | * CPU, which is set above. Skip accounting for it. Everything else |
879 | * finds its CPU here. |
880 | */ |
881 | if (cpu == -1 && set) { |
882 | if (!zalloc_cpumask_var(mask: &diff, GFP_KERNEL)) |
883 | return -ENOMEM; |
884 | |
885 | cpu = cpu_mask_set_get_first(set, diff); |
886 | if (cpu < 0) { |
887 | free_cpumask_var(mask: diff); |
888 | dd_dev_err(dd, "Failure to obtain CPU for IRQ\n" ); |
889 | return cpu; |
890 | } |
891 | |
892 | free_cpumask_var(mask: diff); |
893 | } |
894 | |
895 | cpumask_set_cpu(cpu, dstp: &msix->mask); |
896 | dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n" , |
897 | msix->irq, irq_type_names[msix->type], |
898 | extra, cpu); |
899 | irq_set_affinity_hint(irq: msix->irq, m: &msix->mask); |
900 | |
901 | if (msix->type == IRQ_SDMA) { |
902 | sde->cpu = cpu; |
903 | hfi1_setup_sdma_notifier(msix); |
904 | } |
905 | |
906 | return 0; |
907 | } |
908 | |
909 | int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) |
910 | { |
911 | int ret; |
912 | |
913 | mutex_lock(&node_affinity.lock); |
914 | ret = get_irq_affinity(dd, msix); |
915 | mutex_unlock(lock: &node_affinity.lock); |
916 | return ret; |
917 | } |
918 | |
919 | void hfi1_put_irq_affinity(struct hfi1_devdata *dd, |
920 | struct hfi1_msix_entry *msix) |
921 | { |
922 | struct cpu_mask_set *set = NULL; |
923 | struct hfi1_affinity_node *entry; |
924 | |
925 | mutex_lock(&node_affinity.lock); |
926 | entry = node_affinity_lookup(node: dd->node); |
927 | |
928 | switch (msix->type) { |
929 | case IRQ_SDMA: |
930 | set = &entry->def_intr; |
931 | hfi1_cleanup_sdma_notifier(msix); |
932 | break; |
933 | case IRQ_GENERAL: |
934 | /* Don't do accounting for general contexts */ |
935 | break; |
936 | case IRQ_RCVCTXT: { |
937 | struct hfi1_ctxtdata *rcd = msix->arg; |
938 | |
939 | /* Don't do accounting for control contexts */ |
940 | if (rcd->ctxt != HFI1_CTRL_CTXT) |
941 | set = &entry->rcv_intr; |
942 | break; |
943 | } |
944 | case IRQ_NETDEVCTXT: |
945 | set = &entry->def_intr; |
946 | break; |
947 | default: |
948 | mutex_unlock(lock: &node_affinity.lock); |
949 | return; |
950 | } |
951 | |
952 | if (set) { |
953 | cpumask_andnot(dstp: &set->used, src1p: &set->used, src2p: &msix->mask); |
954 | _cpu_mask_set_gen_dec(set); |
955 | } |
956 | |
957 | irq_set_affinity_hint(irq: msix->irq, NULL); |
958 | cpumask_clear(dstp: &msix->mask); |
959 | mutex_unlock(lock: &node_affinity.lock); |
960 | } |
961 | |
962 | /* This should be called with node_affinity.lock held */ |
963 | static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, |
964 | struct hfi1_affinity_node_list *affinity) |
965 | { |
966 | int possible, curr_cpu, i; |
967 | uint num_cores_per_socket = node_affinity.num_online_cpus / |
968 | affinity->num_core_siblings / |
969 | node_affinity.num_online_nodes; |
970 | |
971 | cpumask_copy(dstp: hw_thread_mask, srcp: &affinity->proc.mask); |
972 | if (affinity->num_core_siblings > 0) { |
973 | /* Removing other siblings not needed for now */ |
974 | possible = cpumask_weight(srcp: hw_thread_mask); |
975 | curr_cpu = cpumask_first(srcp: hw_thread_mask); |
976 | for (i = 0; |
977 | i < num_cores_per_socket * node_affinity.num_online_nodes; |
978 | i++) |
979 | curr_cpu = cpumask_next(n: curr_cpu, srcp: hw_thread_mask); |
980 | |
981 | for (; i < possible; i++) { |
982 | cpumask_clear_cpu(cpu: curr_cpu, dstp: hw_thread_mask); |
983 | curr_cpu = cpumask_next(n: curr_cpu, srcp: hw_thread_mask); |
984 | } |
985 | |
986 | /* Identifying correct HW threads within physical cores */ |
987 | cpumask_shift_left(dstp: hw_thread_mask, srcp: hw_thread_mask, |
988 | n: num_cores_per_socket * |
989 | node_affinity.num_online_nodes * |
990 | hw_thread_no); |
991 | } |
992 | } |
993 | |
994 | int hfi1_get_proc_affinity(int node) |
995 | { |
996 | int cpu = -1, ret, i; |
997 | struct hfi1_affinity_node *entry; |
998 | cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; |
999 | const struct cpumask *node_mask, |
1000 | *proc_mask = current->cpus_ptr; |
1001 | struct hfi1_affinity_node_list *affinity = &node_affinity; |
1002 | struct cpu_mask_set *set = &affinity->proc; |
1003 | |
1004 | /* |
1005 | * check whether process/context affinity has already |
1006 | * been set |
1007 | */ |
1008 | if (current->nr_cpus_allowed == 1) { |
1009 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl" , |
1010 | current->pid, current->comm, |
1011 | cpumask_pr_args(proc_mask)); |
1012 | /* |
1013 | * Mark the pre-set CPU as used. This is atomic so we don't |
1014 | * need the lock |
1015 | */ |
1016 | cpu = cpumask_first(srcp: proc_mask); |
1017 | cpumask_set_cpu(cpu, dstp: &set->used); |
1018 | goto done; |
1019 | } else if (current->nr_cpus_allowed < cpumask_weight(srcp: &set->mask)) { |
1020 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl" , |
1021 | current->pid, current->comm, |
1022 | cpumask_pr_args(proc_mask)); |
1023 | goto done; |
1024 | } |
1025 | |
1026 | /* |
1027 | * The process does not have a preset CPU affinity so find one to |
1028 | * recommend using the following algorithm: |
1029 | * |
1030 | * For each user process that is opening a context on HFI Y: |
1031 | * a) If all cores are filled, reinitialize the bitmask |
1032 | * b) Fill real cores first, then HT cores (First set of HT |
1033 | * cores on all physical cores, then second set of HT core, |
1034 | * and, so on) in the following order: |
1035 | * |
1036 | * 1. Same NUMA node as HFI Y and not running an IRQ |
1037 | * handler |
1038 | * 2. Same NUMA node as HFI Y and running an IRQ handler |
1039 | * 3. Different NUMA node to HFI Y and not running an IRQ |
1040 | * handler |
1041 | * 4. Different NUMA node to HFI Y and running an IRQ |
1042 | * handler |
1043 | * c) Mark core as filled in the bitmask. As user processes are |
1044 | * done, clear cores from the bitmask. |
1045 | */ |
1046 | |
1047 | ret = zalloc_cpumask_var(mask: &diff, GFP_KERNEL); |
1048 | if (!ret) |
1049 | goto done; |
1050 | ret = zalloc_cpumask_var(mask: &hw_thread_mask, GFP_KERNEL); |
1051 | if (!ret) |
1052 | goto free_diff; |
1053 | ret = zalloc_cpumask_var(mask: &available_mask, GFP_KERNEL); |
1054 | if (!ret) |
1055 | goto free_hw_thread_mask; |
1056 | ret = zalloc_cpumask_var(mask: &intrs_mask, GFP_KERNEL); |
1057 | if (!ret) |
1058 | goto free_available_mask; |
1059 | |
1060 | mutex_lock(&affinity->lock); |
1061 | /* |
1062 | * If we've used all available HW threads, clear the mask and start |
1063 | * overloading. |
1064 | */ |
1065 | _cpu_mask_set_gen_inc(set); |
1066 | |
1067 | /* |
1068 | * If NUMA node has CPUs used by interrupt handlers, include them in the |
1069 | * interrupt handler mask. |
1070 | */ |
1071 | entry = node_affinity_lookup(node); |
1072 | if (entry) { |
1073 | cpumask_copy(dstp: intrs_mask, srcp: (entry->def_intr.gen ? |
1074 | &entry->def_intr.mask : |
1075 | &entry->def_intr.used)); |
1076 | cpumask_or(dstp: intrs_mask, src1p: intrs_mask, src2p: (entry->rcv_intr.gen ? |
1077 | &entry->rcv_intr.mask : |
1078 | &entry->rcv_intr.used)); |
1079 | cpumask_or(dstp: intrs_mask, src1p: intrs_mask, src2p: &entry->general_intr_mask); |
1080 | } |
1081 | hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl" , |
1082 | cpumask_pr_args(intrs_mask)); |
1083 | |
1084 | cpumask_copy(dstp: hw_thread_mask, srcp: &set->mask); |
1085 | |
1086 | /* |
1087 | * If HT cores are enabled, identify which HW threads within the |
1088 | * physical cores should be used. |
1089 | */ |
1090 | if (affinity->num_core_siblings > 0) { |
1091 | for (i = 0; i < affinity->num_core_siblings; i++) { |
1092 | find_hw_thread_mask(hw_thread_no: i, hw_thread_mask, affinity); |
1093 | |
1094 | /* |
1095 | * If there's at least one available core for this HW |
1096 | * thread number, stop looking for a core. |
1097 | * |
1098 | * diff will always be not empty at least once in this |
1099 | * loop as the used mask gets reset when |
1100 | * (set->mask == set->used) before this loop. |
1101 | */ |
1102 | cpumask_andnot(dstp: diff, src1p: hw_thread_mask, src2p: &set->used); |
1103 | if (!cpumask_empty(srcp: diff)) |
1104 | break; |
1105 | } |
1106 | } |
1107 | hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl" , |
1108 | cpumask_pr_args(hw_thread_mask)); |
1109 | |
1110 | node_mask = cpumask_of_node(node); |
1111 | hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl" , node, |
1112 | cpumask_pr_args(node_mask)); |
1113 | |
1114 | /* Get cpumask of available CPUs on preferred NUMA */ |
1115 | cpumask_and(dstp: available_mask, src1p: hw_thread_mask, src2p: node_mask); |
1116 | cpumask_andnot(dstp: available_mask, src1p: available_mask, src2p: &set->used); |
1117 | hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl" , node, |
1118 | cpumask_pr_args(available_mask)); |
1119 | |
1120 | /* |
1121 | * At first, we don't want to place processes on the same |
1122 | * CPUs as interrupt handlers. Then, CPUs running interrupt |
1123 | * handlers are used. |
1124 | * |
1125 | * 1) If diff is not empty, then there are CPUs not running |
1126 | * non-interrupt handlers available, so diff gets copied |
1127 | * over to available_mask. |
1128 | * 2) If diff is empty, then all CPUs not running interrupt |
1129 | * handlers are taken, so available_mask contains all |
1130 | * available CPUs running interrupt handlers. |
1131 | * 3) If available_mask is empty, then all CPUs on the |
1132 | * preferred NUMA node are taken, so other NUMA nodes are |
1133 | * used for process assignments using the same method as |
1134 | * the preferred NUMA node. |
1135 | */ |
1136 | cpumask_andnot(dstp: diff, src1p: available_mask, src2p: intrs_mask); |
1137 | if (!cpumask_empty(srcp: diff)) |
1138 | cpumask_copy(dstp: available_mask, srcp: diff); |
1139 | |
1140 | /* If we don't have CPUs on the preferred node, use other NUMA nodes */ |
1141 | if (cpumask_empty(srcp: available_mask)) { |
1142 | cpumask_andnot(dstp: available_mask, src1p: hw_thread_mask, src2p: &set->used); |
1143 | /* Excluding preferred NUMA cores */ |
1144 | cpumask_andnot(dstp: available_mask, src1p: available_mask, src2p: node_mask); |
1145 | hfi1_cdbg(PROC, |
1146 | "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl" , |
1147 | cpumask_pr_args(available_mask)); |
1148 | |
1149 | /* |
1150 | * At first, we don't want to place processes on the same |
1151 | * CPUs as interrupt handlers. |
1152 | */ |
1153 | cpumask_andnot(dstp: diff, src1p: available_mask, src2p: intrs_mask); |
1154 | if (!cpumask_empty(srcp: diff)) |
1155 | cpumask_copy(dstp: available_mask, srcp: diff); |
1156 | } |
1157 | hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl" , |
1158 | cpumask_pr_args(available_mask)); |
1159 | |
1160 | cpu = cpumask_first(srcp: available_mask); |
1161 | if (cpu >= nr_cpu_ids) /* empty */ |
1162 | cpu = -1; |
1163 | else |
1164 | cpumask_set_cpu(cpu, dstp: &set->used); |
1165 | |
1166 | mutex_unlock(lock: &affinity->lock); |
1167 | hfi1_cdbg(PROC, "Process assigned to CPU %d" , cpu); |
1168 | |
1169 | free_cpumask_var(mask: intrs_mask); |
1170 | free_available_mask: |
1171 | free_cpumask_var(mask: available_mask); |
1172 | free_hw_thread_mask: |
1173 | free_cpumask_var(mask: hw_thread_mask); |
1174 | free_diff: |
1175 | free_cpumask_var(mask: diff); |
1176 | done: |
1177 | return cpu; |
1178 | } |
1179 | |
1180 | void hfi1_put_proc_affinity(int cpu) |
1181 | { |
1182 | struct hfi1_affinity_node_list *affinity = &node_affinity; |
1183 | struct cpu_mask_set *set = &affinity->proc; |
1184 | |
1185 | if (cpu < 0) |
1186 | return; |
1187 | |
1188 | mutex_lock(&affinity->lock); |
1189 | cpu_mask_set_put(set, cpu); |
1190 | hfi1_cdbg(PROC, "Returning CPU %d for future process assignment" , cpu); |
1191 | mutex_unlock(lock: &affinity->lock); |
1192 | } |
1193 | |