1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * cpu_rmap.c: CPU affinity reverse-map support |
4 | * Copyright 2011 Solarflare Communications Inc. |
5 | */ |
6 | |
7 | #include <linux/cpu_rmap.h> |
8 | #include <linux/interrupt.h> |
9 | #include <linux/export.h> |
10 | |
11 | /* |
12 | * These functions maintain a mapping from CPUs to some ordered set of |
13 | * objects with CPU affinities. This can be seen as a reverse-map of |
14 | * CPU affinity. However, we do not assume that the object affinities |
15 | * cover all CPUs in the system. For those CPUs not directly covered |
16 | * by object affinities, we attempt to find a nearest object based on |
17 | * CPU topology. |
18 | */ |
19 | |
20 | /** |
21 | * alloc_cpu_rmap - allocate CPU affinity reverse-map |
22 | * @size: Number of objects to be mapped |
23 | * @flags: Allocation flags e.g. %GFP_KERNEL |
24 | */ |
25 | struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) |
26 | { |
27 | struct cpu_rmap *rmap; |
28 | unsigned int cpu; |
29 | size_t obj_offset; |
30 | |
31 | /* This is a silly number of objects, and we use u16 indices. */ |
32 | if (size > 0xffff) |
33 | return NULL; |
34 | |
35 | /* Offset of object pointer array from base structure */ |
36 | obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), |
37 | sizeof(void *)); |
38 | |
39 | rmap = kzalloc(size: obj_offset + size * sizeof(rmap->obj[0]), flags); |
40 | if (!rmap) |
41 | return NULL; |
42 | |
43 | kref_init(kref: &rmap->refcount); |
44 | rmap->obj = (void **)((char *)rmap + obj_offset); |
45 | |
46 | /* Initially assign CPUs to objects on a rota, since we have |
47 | * no idea where the objects are. Use infinite distance, so |
48 | * any object with known distance is preferable. Include the |
49 | * CPUs that are not present/online, since we definitely want |
50 | * any newly-hotplugged CPUs to have some object assigned. |
51 | */ |
52 | for_each_possible_cpu(cpu) { |
53 | rmap->near[cpu].index = cpu % size; |
54 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; |
55 | } |
56 | |
57 | rmap->size = size; |
58 | return rmap; |
59 | } |
60 | EXPORT_SYMBOL(alloc_cpu_rmap); |
61 | |
62 | /** |
63 | * cpu_rmap_release - internal reclaiming helper called from kref_put |
64 | * @ref: kref to struct cpu_rmap |
65 | */ |
66 | static void cpu_rmap_release(struct kref *ref) |
67 | { |
68 | struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); |
69 | kfree(objp: rmap); |
70 | } |
71 | |
72 | /** |
73 | * cpu_rmap_get - internal helper to get new ref on a cpu_rmap |
74 | * @rmap: reverse-map allocated with alloc_cpu_rmap() |
75 | */ |
76 | static inline void cpu_rmap_get(struct cpu_rmap *rmap) |
77 | { |
78 | kref_get(kref: &rmap->refcount); |
79 | } |
80 | |
81 | /** |
82 | * cpu_rmap_put - release ref on a cpu_rmap |
83 | * @rmap: reverse-map allocated with alloc_cpu_rmap() |
84 | */ |
85 | int cpu_rmap_put(struct cpu_rmap *rmap) |
86 | { |
87 | return kref_put(kref: &rmap->refcount, release: cpu_rmap_release); |
88 | } |
89 | EXPORT_SYMBOL(cpu_rmap_put); |
90 | |
91 | /* Reevaluate nearest object for given CPU, comparing with the given |
92 | * neighbours at the given distance. |
93 | */ |
94 | static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, |
95 | const struct cpumask *mask, u16 dist) |
96 | { |
97 | int neigh; |
98 | |
99 | for_each_cpu(neigh, mask) { |
100 | if (rmap->near[cpu].dist > dist && |
101 | rmap->near[neigh].dist <= dist) { |
102 | rmap->near[cpu].index = rmap->near[neigh].index; |
103 | rmap->near[cpu].dist = dist; |
104 | return true; |
105 | } |
106 | } |
107 | return false; |
108 | } |
109 | |
110 | #ifdef DEBUG |
111 | static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) |
112 | { |
113 | unsigned index; |
114 | unsigned int cpu; |
115 | |
116 | pr_info("cpu_rmap %p, %s:\n" , rmap, prefix); |
117 | |
118 | for_each_possible_cpu(cpu) { |
119 | index = rmap->near[cpu].index; |
120 | pr_info("cpu %d -> obj %u (distance %u)\n" , |
121 | cpu, index, rmap->near[cpu].dist); |
122 | } |
123 | } |
124 | #else |
125 | static inline void |
126 | debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) |
127 | { |
128 | } |
129 | #endif |
130 | |
131 | static int get_free_index(struct cpu_rmap *rmap) |
132 | { |
133 | int i; |
134 | |
135 | for (i = 0; i < rmap->size; i++) |
136 | if (!rmap->obj[i]) |
137 | return i; |
138 | |
139 | return -ENOSPC; |
140 | } |
141 | |
142 | /** |
143 | * cpu_rmap_add - add object to a rmap |
144 | * @rmap: CPU rmap allocated with alloc_cpu_rmap() |
145 | * @obj: Object to add to rmap |
146 | * |
147 | * Return index of object or -ENOSPC if no free entry was found |
148 | */ |
149 | int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) |
150 | { |
151 | int index = get_free_index(rmap); |
152 | |
153 | if (index < 0) |
154 | return index; |
155 | |
156 | rmap->obj[index] = obj; |
157 | return index; |
158 | } |
159 | EXPORT_SYMBOL(cpu_rmap_add); |
160 | |
161 | /** |
162 | * cpu_rmap_update - update CPU rmap following a change of object affinity |
163 | * @rmap: CPU rmap to update |
164 | * @index: Index of object whose affinity changed |
165 | * @affinity: New CPU affinity of object |
166 | */ |
167 | int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, |
168 | const struct cpumask *affinity) |
169 | { |
170 | cpumask_var_t update_mask; |
171 | unsigned int cpu; |
172 | |
173 | if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) |
174 | return -ENOMEM; |
175 | |
176 | /* Invalidate distance for all CPUs for which this used to be |
177 | * the nearest object. Mark those CPUs for update. |
178 | */ |
179 | for_each_online_cpu(cpu) { |
180 | if (rmap->near[cpu].index == index) { |
181 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; |
182 | cpumask_set_cpu(cpu, dstp: update_mask); |
183 | } |
184 | } |
185 | |
186 | debug_print_rmap(rmap, prefix: "after invalidating old distances" ); |
187 | |
188 | /* Set distance to 0 for all CPUs in the new affinity mask. |
189 | * Mark all CPUs within their NUMA nodes for update. |
190 | */ |
191 | for_each_cpu(cpu, affinity) { |
192 | rmap->near[cpu].index = index; |
193 | rmap->near[cpu].dist = 0; |
194 | cpumask_or(dstp: update_mask, src1p: update_mask, |
195 | src2p: cpumask_of_node(cpu_to_node(cpu))); |
196 | } |
197 | |
198 | debug_print_rmap(rmap, prefix: "after updating neighbours" ); |
199 | |
200 | /* Update distances based on topology */ |
201 | for_each_cpu(cpu, update_mask) { |
202 | if (cpu_rmap_copy_neigh(rmap, cpu, |
203 | topology_sibling_cpumask(cpu), dist: 1)) |
204 | continue; |
205 | if (cpu_rmap_copy_neigh(rmap, cpu, |
206 | topology_core_cpumask(cpu), dist: 2)) |
207 | continue; |
208 | if (cpu_rmap_copy_neigh(rmap, cpu, |
209 | mask: cpumask_of_node(cpu_to_node(cpu)), dist: 3)) |
210 | continue; |
211 | /* We could continue into NUMA node distances, but for now |
212 | * we give up. |
213 | */ |
214 | } |
215 | |
216 | debug_print_rmap(rmap, prefix: "after copying neighbours" ); |
217 | |
218 | free_cpumask_var(mask: update_mask); |
219 | return 0; |
220 | } |
221 | EXPORT_SYMBOL(cpu_rmap_update); |
222 | |
223 | /* Glue between IRQ affinity notifiers and CPU rmaps */ |
224 | |
225 | struct irq_glue { |
226 | struct irq_affinity_notify notify; |
227 | struct cpu_rmap *rmap; |
228 | u16 index; |
229 | }; |
230 | |
231 | /** |
232 | * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs |
233 | * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL |
234 | * |
235 | * Must be called in process context, before freeing the IRQs. |
236 | */ |
237 | void free_irq_cpu_rmap(struct cpu_rmap *rmap) |
238 | { |
239 | struct irq_glue *glue; |
240 | u16 index; |
241 | |
242 | if (!rmap) |
243 | return; |
244 | |
245 | for (index = 0; index < rmap->size; index++) { |
246 | glue = rmap->obj[index]; |
247 | if (glue) |
248 | irq_set_affinity_notifier(irq: glue->notify.irq, NULL); |
249 | } |
250 | |
251 | cpu_rmap_put(rmap); |
252 | } |
253 | EXPORT_SYMBOL(free_irq_cpu_rmap); |
254 | |
255 | /** |
256 | * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated |
257 | * @notify: struct irq_affinity_notify passed by irq/manage.c |
258 | * @mask: cpu mask for new SMP affinity |
259 | * |
260 | * This is executed in workqueue context. |
261 | */ |
262 | static void |
263 | irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) |
264 | { |
265 | struct irq_glue *glue = |
266 | container_of(notify, struct irq_glue, notify); |
267 | int rc; |
268 | |
269 | rc = cpu_rmap_update(glue->rmap, glue->index, mask); |
270 | if (rc) |
271 | pr_warn("irq_cpu_rmap_notify: update failed: %d\n" , rc); |
272 | } |
273 | |
274 | /** |
275 | * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem |
276 | * @ref: kref to struct irq_affinity_notify passed by irq/manage.c |
277 | */ |
278 | static void irq_cpu_rmap_release(struct kref *ref) |
279 | { |
280 | struct irq_glue *glue = |
281 | container_of(ref, struct irq_glue, notify.kref); |
282 | |
283 | glue->rmap->obj[glue->index] = NULL; |
284 | cpu_rmap_put(glue->rmap); |
285 | kfree(objp: glue); |
286 | } |
287 | |
288 | /** |
289 | * irq_cpu_rmap_remove - remove an IRQ from a CPU affinity reverse-map |
290 | * @rmap: The reverse-map |
291 | * @irq: The IRQ number |
292 | */ |
293 | int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq) |
294 | { |
295 | return irq_set_affinity_notifier(irq, NULL); |
296 | } |
297 | EXPORT_SYMBOL(irq_cpu_rmap_remove); |
298 | |
299 | /** |
300 | * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map |
301 | * @rmap: The reverse-map |
302 | * @irq: The IRQ number |
303 | * |
304 | * This adds an IRQ affinity notifier that will update the reverse-map |
305 | * automatically. |
306 | * |
307 | * Must be called in process context, after the IRQ is allocated but |
308 | * before it is bound with request_irq(). |
309 | */ |
310 | int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) |
311 | { |
312 | struct irq_glue *glue = kzalloc(size: sizeof(*glue), GFP_KERNEL); |
313 | int rc; |
314 | |
315 | if (!glue) |
316 | return -ENOMEM; |
317 | glue->notify.notify = irq_cpu_rmap_notify; |
318 | glue->notify.release = irq_cpu_rmap_release; |
319 | glue->rmap = rmap; |
320 | cpu_rmap_get(rmap); |
321 | rc = cpu_rmap_add(rmap, glue); |
322 | if (rc < 0) |
323 | goto err_add; |
324 | |
325 | glue->index = rc; |
326 | rc = irq_set_affinity_notifier(irq, notify: &glue->notify); |
327 | if (rc) |
328 | goto err_set; |
329 | |
330 | return rc; |
331 | |
332 | err_set: |
333 | rmap->obj[glue->index] = NULL; |
334 | err_add: |
335 | cpu_rmap_put(glue->rmap); |
336 | kfree(objp: glue); |
337 | return rc; |
338 | } |
339 | EXPORT_SYMBOL(irq_cpu_rmap_add); |
340 | |