1 | /* |
2 | * This file is subject to the terms and conditions of the GNU General Public |
3 | * License. See the file "COPYING" in the main directory of this archive |
4 | * for more details. |
5 | * |
6 | * (C) Copyright 2020 Hewlett Packard Enterprise Development LP |
7 | * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. |
8 | */ |
9 | |
10 | /* |
11 | * Cross Partition Communication (XPC) partition support. |
12 | * |
13 | * This is the part of XPC that detects the presence/absence of |
14 | * other partitions. It provides a heartbeat and monitors the |
15 | * heartbeats of other partitions. |
16 | * |
17 | */ |
18 | |
19 | #include <linux/device.h> |
20 | #include <linux/hardirq.h> |
21 | #include <linux/slab.h> |
22 | #include "xpc.h" |
23 | #include <asm/uv/uv_hub.h> |
24 | |
25 | /* XPC is exiting flag */ |
26 | int xpc_exiting; |
27 | |
28 | /* this partition's reserved page pointers */ |
29 | struct xpc_rsvd_page *xpc_rsvd_page; |
30 | static unsigned long *xpc_part_nasids; |
31 | unsigned long *xpc_mach_nasids; |
32 | |
33 | static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ |
34 | int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ |
35 | |
36 | struct xpc_partition *xpc_partitions; |
37 | |
38 | /* |
39 | * Guarantee that the kmalloc'd memory is cacheline aligned. |
40 | */ |
41 | void * |
42 | xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) |
43 | { |
44 | /* see if kmalloc will give us cachline aligned memory by default */ |
45 | *base = kmalloc(size, flags); |
46 | if (*base == NULL) |
47 | return NULL; |
48 | |
49 | if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) |
50 | return *base; |
51 | |
52 | kfree(objp: *base); |
53 | |
54 | /* nope, we'll have to do it ourselves */ |
55 | *base = kmalloc(size: size + L1_CACHE_BYTES, flags); |
56 | if (*base == NULL) |
57 | return NULL; |
58 | |
59 | return (void *)L1_CACHE_ALIGN((u64)*base); |
60 | } |
61 | |
62 | /* |
63 | * Given a nasid, get the physical address of the partition's reserved page |
64 | * for that nasid. This function returns 0 on any error. |
65 | */ |
66 | static unsigned long |
67 | xpc_get_rsvd_page_pa(int nasid) |
68 | { |
69 | enum xp_retval ret; |
70 | u64 cookie = 0; |
71 | unsigned long rp_pa = nasid; /* seed with nasid */ |
72 | size_t len = 0; |
73 | size_t buf_len = 0; |
74 | void *buf = NULL; |
75 | void *buf_base = NULL; |
76 | enum xp_retval (*get_partition_rsvd_page_pa) |
77 | (void *, u64 *, unsigned long *, size_t *) = |
78 | xpc_arch_ops.get_partition_rsvd_page_pa; |
79 | |
80 | while (1) { |
81 | |
82 | /* !!! rp_pa will need to be _gpa on UV. |
83 | * ??? So do we save it into the architecture specific parts |
84 | * ??? of the xpc_partition structure? Do we rename this |
85 | * ??? function or have two versions? Rename rp_pa for UV to |
86 | * ??? rp_gpa? |
87 | */ |
88 | ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); |
89 | |
90 | dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " |
91 | "address=0x%016lx, len=0x%016lx\n" , ret, |
92 | (unsigned long)cookie, rp_pa, len); |
93 | |
94 | if (ret != xpNeedMoreInfo) |
95 | break; |
96 | |
97 | if (len > buf_len) { |
98 | kfree(objp: buf_base); |
99 | buf_len = L1_CACHE_ALIGN(len); |
100 | buf = xpc_kmalloc_cacheline_aligned(size: buf_len, GFP_KERNEL, |
101 | base: &buf_base); |
102 | if (buf_base == NULL) { |
103 | dev_err(xpc_part, "unable to kmalloc " |
104 | "len=0x%016lx\n" , buf_len); |
105 | ret = xpNoMemory; |
106 | break; |
107 | } |
108 | } |
109 | |
110 | ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len); |
111 | if (ret != xpSuccess) { |
112 | dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n" , ret); |
113 | break; |
114 | } |
115 | } |
116 | |
117 | kfree(objp: buf_base); |
118 | |
119 | if (ret != xpSuccess) |
120 | rp_pa = 0; |
121 | |
122 | dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n" , rp_pa); |
123 | return rp_pa; |
124 | } |
125 | |
126 | /* |
127 | * Fill the partition reserved page with the information needed by |
128 | * other partitions to discover we are alive and establish initial |
129 | * communications. |
130 | */ |
131 | int |
132 | xpc_setup_rsvd_page(void) |
133 | { |
134 | int ret; |
135 | struct xpc_rsvd_page *rp; |
136 | unsigned long rp_pa; |
137 | unsigned long new_ts_jiffies; |
138 | |
139 | /* get the local reserved page's address */ |
140 | |
141 | preempt_disable(); |
142 | rp_pa = xpc_get_rsvd_page_pa(nasid: xp_cpu_to_nasid(smp_processor_id())); |
143 | preempt_enable(); |
144 | if (rp_pa == 0) { |
145 | dev_err(xpc_part, "SAL failed to locate the reserved page\n" ); |
146 | return -ESRCH; |
147 | } |
148 | rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa)); |
149 | |
150 | if (rp->SAL_version < 3) { |
151 | /* SAL_versions < 3 had a SAL_partid defined as a u8 */ |
152 | rp->SAL_partid &= 0xff; |
153 | } |
154 | BUG_ON(rp->SAL_partid != xp_partition_id); |
155 | |
156 | if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { |
157 | dev_err(xpc_part, "the reserved page's partid of %d is outside " |
158 | "supported range (< 0 || >= %d)\n" , rp->SAL_partid, |
159 | xp_max_npartitions); |
160 | return -EINVAL; |
161 | } |
162 | |
163 | rp->version = XPC_RP_VERSION; |
164 | rp->max_npartitions = xp_max_npartitions; |
165 | |
166 | /* establish the actual sizes of the nasid masks */ |
167 | if (rp->SAL_version == 1) { |
168 | /* SAL_version 1 didn't set the nasids_size field */ |
169 | rp->SAL_nasids_size = 128; |
170 | } |
171 | xpc_nasid_mask_nbytes = rp->SAL_nasids_size; |
172 | xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * |
173 | BITS_PER_BYTE); |
174 | |
175 | /* setup the pointers to the various items in the reserved page */ |
176 | xpc_part_nasids = XPC_RP_PART_NASIDS(rp); |
177 | xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); |
178 | |
179 | ret = xpc_arch_ops.setup_rsvd_page(rp); |
180 | if (ret != 0) |
181 | return ret; |
182 | |
183 | /* |
184 | * Set timestamp of when reserved page was setup by XPC. |
185 | * This signifies to the remote partition that our reserved |
186 | * page is initialized. |
187 | */ |
188 | new_ts_jiffies = jiffies; |
189 | if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) |
190 | new_ts_jiffies++; |
191 | rp->ts_jiffies = new_ts_jiffies; |
192 | |
193 | xpc_rsvd_page = rp; |
194 | return 0; |
195 | } |
196 | |
197 | void |
198 | xpc_teardown_rsvd_page(void) |
199 | { |
200 | /* a zero timestamp indicates our rsvd page is not initialized */ |
201 | xpc_rsvd_page->ts_jiffies = 0; |
202 | } |
203 | |
204 | /* |
205 | * Get a copy of a portion of the remote partition's rsvd page. |
206 | * |
207 | * remote_rp points to a buffer that is cacheline aligned for BTE copies and |
208 | * is large enough to contain a copy of their reserved page header and |
209 | * part_nasids mask. |
210 | */ |
211 | enum xp_retval |
212 | xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, |
213 | struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) |
214 | { |
215 | int l; |
216 | enum xp_retval ret; |
217 | |
218 | /* get the reserved page's physical address */ |
219 | |
220 | *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); |
221 | if (*remote_rp_pa == 0) |
222 | return xpNoRsvdPageAddr; |
223 | |
224 | /* pull over the reserved page header and part_nasids mask */ |
225 | ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, |
226 | XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); |
227 | if (ret != xpSuccess) |
228 | return ret; |
229 | |
230 | if (discovered_nasids != NULL) { |
231 | unsigned long *remote_part_nasids = |
232 | XPC_RP_PART_NASIDS(remote_rp); |
233 | |
234 | for (l = 0; l < xpc_nasid_mask_nlongs; l++) |
235 | discovered_nasids[l] |= remote_part_nasids[l]; |
236 | } |
237 | |
238 | /* zero timestamp indicates the reserved page has not been setup */ |
239 | if (remote_rp->ts_jiffies == 0) |
240 | return xpRsvdPageNotSet; |
241 | |
242 | if (XPC_VERSION_MAJOR(remote_rp->version) != |
243 | XPC_VERSION_MAJOR(XPC_RP_VERSION)) { |
244 | return xpBadVersion; |
245 | } |
246 | |
247 | /* check that both remote and local partids are valid for each side */ |
248 | if (remote_rp->SAL_partid < 0 || |
249 | remote_rp->SAL_partid >= xp_max_npartitions || |
250 | remote_rp->max_npartitions <= xp_partition_id) { |
251 | return xpInvalidPartid; |
252 | } |
253 | |
254 | if (remote_rp->SAL_partid == xp_partition_id) |
255 | return xpLocalPartid; |
256 | |
257 | return xpSuccess; |
258 | } |
259 | |
260 | /* |
261 | * See if the other side has responded to a partition deactivate request |
262 | * from us. Though we requested the remote partition to deactivate with regard |
263 | * to us, we really only need to wait for the other side to disengage from us. |
264 | */ |
265 | static int __xpc_partition_disengaged(struct xpc_partition *part, |
266 | bool from_timer) |
267 | { |
268 | short partid = XPC_PARTID(part); |
269 | int disengaged; |
270 | |
271 | disengaged = !xpc_arch_ops.partition_engaged(partid); |
272 | if (part->disengage_timeout) { |
273 | if (!disengaged) { |
274 | if (time_is_after_jiffies(part->disengage_timeout)) { |
275 | /* timelimit hasn't been reached yet */ |
276 | return 0; |
277 | } |
278 | |
279 | /* |
280 | * Other side hasn't responded to our deactivate |
281 | * request in a timely fashion, so assume it's dead. |
282 | */ |
283 | |
284 | dev_info(xpc_part, "deactivate request to remote " |
285 | "partition %d timed out\n" , partid); |
286 | xpc_disengage_timedout = 1; |
287 | xpc_arch_ops.assume_partition_disengaged(partid); |
288 | disengaged = 1; |
289 | } |
290 | part->disengage_timeout = 0; |
291 | |
292 | /* Cancel the timer function if not called from it */ |
293 | if (!from_timer) |
294 | del_timer_sync(timer: &part->disengage_timer); |
295 | |
296 | DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && |
297 | part->act_state != XPC_P_AS_INACTIVE); |
298 | if (part->act_state != XPC_P_AS_INACTIVE) |
299 | xpc_wakeup_channel_mgr(part); |
300 | |
301 | xpc_arch_ops.cancel_partition_deactivation_request(part); |
302 | } |
303 | return disengaged; |
304 | } |
305 | |
306 | int xpc_partition_disengaged(struct xpc_partition *part) |
307 | { |
308 | return __xpc_partition_disengaged(part, from_timer: false); |
309 | } |
310 | |
311 | int xpc_partition_disengaged_from_timer(struct xpc_partition *part) |
312 | { |
313 | return __xpc_partition_disengaged(part, from_timer: true); |
314 | } |
315 | |
316 | /* |
317 | * Mark specified partition as active. |
318 | */ |
319 | enum xp_retval |
320 | xpc_mark_partition_active(struct xpc_partition *part) |
321 | { |
322 | unsigned long irq_flags; |
323 | enum xp_retval ret; |
324 | |
325 | dev_dbg(xpc_part, "setting partition %d to ACTIVE\n" , XPC_PARTID(part)); |
326 | |
327 | spin_lock_irqsave(&part->act_lock, irq_flags); |
328 | if (part->act_state == XPC_P_AS_ACTIVATING) { |
329 | part->act_state = XPC_P_AS_ACTIVE; |
330 | ret = xpSuccess; |
331 | } else { |
332 | DBUG_ON(part->reason == xpSuccess); |
333 | ret = part->reason; |
334 | } |
335 | spin_unlock_irqrestore(lock: &part->act_lock, flags: irq_flags); |
336 | |
337 | return ret; |
338 | } |
339 | |
340 | /* |
341 | * Start the process of deactivating the specified partition. |
342 | */ |
343 | void |
344 | xpc_deactivate_partition(const int line, struct xpc_partition *part, |
345 | enum xp_retval reason) |
346 | { |
347 | unsigned long irq_flags; |
348 | |
349 | spin_lock_irqsave(&part->act_lock, irq_flags); |
350 | |
351 | if (part->act_state == XPC_P_AS_INACTIVE) { |
352 | XPC_SET_REASON(part, reason, line); |
353 | spin_unlock_irqrestore(lock: &part->act_lock, flags: irq_flags); |
354 | if (reason == xpReactivating) { |
355 | /* we interrupt ourselves to reactivate partition */ |
356 | xpc_arch_ops.request_partition_reactivation(part); |
357 | } |
358 | return; |
359 | } |
360 | if (part->act_state == XPC_P_AS_DEACTIVATING) { |
361 | if ((part->reason == xpUnloading && reason != xpUnloading) || |
362 | reason == xpReactivating) { |
363 | XPC_SET_REASON(part, reason, line); |
364 | } |
365 | spin_unlock_irqrestore(lock: &part->act_lock, flags: irq_flags); |
366 | return; |
367 | } |
368 | |
369 | part->act_state = XPC_P_AS_DEACTIVATING; |
370 | XPC_SET_REASON(part, reason, line); |
371 | |
372 | spin_unlock_irqrestore(lock: &part->act_lock, flags: irq_flags); |
373 | |
374 | /* ask remote partition to deactivate with regard to us */ |
375 | xpc_arch_ops.request_partition_deactivation(part); |
376 | |
377 | /* set a timelimit on the disengage phase of the deactivation request */ |
378 | part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); |
379 | part->disengage_timer.expires = part->disengage_timeout; |
380 | add_timer(timer: &part->disengage_timer); |
381 | |
382 | dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n" , |
383 | XPC_PARTID(part), reason); |
384 | |
385 | xpc_partition_going_down(part, reason); |
386 | } |
387 | |
388 | /* |
389 | * Mark specified partition as inactive. |
390 | */ |
391 | void |
392 | xpc_mark_partition_inactive(struct xpc_partition *part) |
393 | { |
394 | unsigned long irq_flags; |
395 | |
396 | dev_dbg(xpc_part, "setting partition %d to INACTIVE\n" , |
397 | XPC_PARTID(part)); |
398 | |
399 | spin_lock_irqsave(&part->act_lock, irq_flags); |
400 | part->act_state = XPC_P_AS_INACTIVE; |
401 | spin_unlock_irqrestore(lock: &part->act_lock, flags: irq_flags); |
402 | part->remote_rp_pa = 0; |
403 | } |
404 | |
405 | /* |
406 | * SAL has provided a partition and machine mask. The partition mask |
407 | * contains a bit for each even nasid in our partition. The machine |
408 | * mask contains a bit for each even nasid in the entire machine. |
409 | * |
410 | * Using those two bit arrays, we can determine which nasids are |
411 | * known in the machine. Each should also have a reserved page |
412 | * initialized if they are available for partitioning. |
413 | */ |
414 | void |
415 | xpc_discovery(void) |
416 | { |
417 | void *remote_rp_base; |
418 | struct xpc_rsvd_page *remote_rp; |
419 | unsigned long remote_rp_pa; |
420 | int region; |
421 | int region_size; |
422 | int max_regions; |
423 | int nasid; |
424 | unsigned long *discovered_nasids; |
425 | enum xp_retval ret; |
426 | |
427 | remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + |
428 | xpc_nasid_mask_nbytes, |
429 | GFP_KERNEL, base: &remote_rp_base); |
430 | if (remote_rp == NULL) |
431 | return; |
432 | |
433 | discovered_nasids = kcalloc(n: xpc_nasid_mask_nlongs, size: sizeof(long), |
434 | GFP_KERNEL); |
435 | if (discovered_nasids == NULL) { |
436 | kfree(objp: remote_rp_base); |
437 | return; |
438 | } |
439 | |
440 | /* |
441 | * The term 'region' in this context refers to the minimum number of |
442 | * nodes that can comprise an access protection grouping. The access |
443 | * protection is in regards to memory, IOI and IPI. |
444 | */ |
445 | region_size = xp_region_size; |
446 | |
447 | if (is_uv_system()) |
448 | max_regions = 256; |
449 | else { |
450 | max_regions = 64; |
451 | |
452 | switch (region_size) { |
453 | case 128: |
454 | max_regions *= 2; |
455 | fallthrough; |
456 | case 64: |
457 | max_regions *= 2; |
458 | fallthrough; |
459 | case 32: |
460 | max_regions *= 2; |
461 | region_size = 16; |
462 | } |
463 | } |
464 | |
465 | for (region = 0; region < max_regions; region++) { |
466 | |
467 | if (xpc_exiting) |
468 | break; |
469 | |
470 | dev_dbg(xpc_part, "searching region %d\n" , region); |
471 | |
472 | for (nasid = (region * region_size * 2); |
473 | nasid < ((region + 1) * region_size * 2); nasid += 2) { |
474 | |
475 | if (xpc_exiting) |
476 | break; |
477 | |
478 | dev_dbg(xpc_part, "checking nasid %d\n" , nasid); |
479 | |
480 | if (test_bit(nasid / 2, xpc_part_nasids)) { |
481 | dev_dbg(xpc_part, "PROM indicates Nasid %d is " |
482 | "part of the local partition; skipping " |
483 | "region\n" , nasid); |
484 | break; |
485 | } |
486 | |
487 | if (!(test_bit(nasid / 2, xpc_mach_nasids))) { |
488 | dev_dbg(xpc_part, "PROM indicates Nasid %d was " |
489 | "not on Numa-Link network at reset\n" , |
490 | nasid); |
491 | continue; |
492 | } |
493 | |
494 | if (test_bit(nasid / 2, discovered_nasids)) { |
495 | dev_dbg(xpc_part, "Nasid %d is part of a " |
496 | "partition which was previously " |
497 | "discovered\n" , nasid); |
498 | continue; |
499 | } |
500 | |
501 | /* pull over the rsvd page header & part_nasids mask */ |
502 | |
503 | ret = xpc_get_remote_rp(nasid, discovered_nasids, |
504 | remote_rp, remote_rp_pa: &remote_rp_pa); |
505 | if (ret != xpSuccess) { |
506 | dev_dbg(xpc_part, "unable to get reserved page " |
507 | "from nasid %d, reason=%d\n" , nasid, |
508 | ret); |
509 | |
510 | if (ret == xpLocalPartid) |
511 | break; |
512 | |
513 | continue; |
514 | } |
515 | |
516 | xpc_arch_ops.request_partition_activation(remote_rp, |
517 | remote_rp_pa, nasid); |
518 | } |
519 | } |
520 | |
521 | kfree(objp: discovered_nasids); |
522 | kfree(objp: remote_rp_base); |
523 | } |
524 | |
525 | /* |
526 | * Given a partid, get the nasids owned by that partition from the |
527 | * remote partition's reserved page. |
528 | */ |
529 | enum xp_retval |
530 | xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) |
531 | { |
532 | struct xpc_partition *part; |
533 | unsigned long part_nasid_pa; |
534 | |
535 | part = &xpc_partitions[partid]; |
536 | if (part->remote_rp_pa == 0) |
537 | return xpPartitionDown; |
538 | |
539 | memset(nasid_mask, 0, xpc_nasid_mask_nbytes); |
540 | |
541 | part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); |
542 | |
543 | return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, |
544 | xpc_nasid_mask_nbytes); |
545 | } |
546 | |