1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2010, 2011, 2012, Lemote, Inc. |
4 | * Author: Chen Huacai, chenhc@lemote.com |
5 | */ |
6 | |
7 | #include <irq.h> |
8 | #include <linux/init.h> |
9 | #include <linux/cpu.h> |
10 | #include <linux/sched.h> |
11 | #include <linux/sched/hotplug.h> |
12 | #include <linux/sched/task_stack.h> |
13 | #include <linux/smp.h> |
14 | #include <linux/cpufreq.h> |
15 | #include <linux/kexec.h> |
16 | #include <asm/processor.h> |
17 | #include <asm/smp.h> |
18 | #include <asm/time.h> |
19 | #include <asm/tlbflush.h> |
20 | #include <asm/cacheflush.h> |
21 | #include <loongson.h> |
22 | #include <loongson_regs.h> |
23 | #include <workarounds.h> |
24 | |
25 | #include "smp.h" |
26 | |
27 | DEFINE_PER_CPU(int, cpu_state); |
28 | |
29 | #define LS_IPI_IRQ (MIPS_CPU_IRQ_BASE + 6) |
30 | |
31 | static void __iomem *ipi_set0_regs[16]; |
32 | static void __iomem *ipi_clear0_regs[16]; |
33 | static void __iomem *ipi_status0_regs[16]; |
34 | static void __iomem *ipi_en0_regs[16]; |
35 | static void __iomem *ipi_mailbox_buf[16]; |
36 | static uint32_t core0_c0count[NR_CPUS]; |
37 | |
38 | static u32 (*ipi_read_clear)(int cpu); |
39 | static void (*ipi_write_action)(int cpu, u32 action); |
40 | static void (*ipi_write_enable)(int cpu); |
41 | static void (*ipi_clear_buf)(int cpu); |
42 | static void (*ipi_write_buf)(int cpu, struct task_struct *idle); |
43 | |
44 | /* send mail via Mail_Send register for 3A4000+ CPU */ |
45 | static void csr_mail_send(uint64_t data, int cpu, int mailbox) |
46 | { |
47 | uint64_t val; |
48 | |
49 | /* send high 32 bits */ |
50 | val = CSR_MAIL_SEND_BLOCK; |
51 | val |= (CSR_MAIL_SEND_BOX_HIGH(mailbox) << CSR_MAIL_SEND_BOX_SHIFT); |
52 | val |= (cpu << CSR_MAIL_SEND_CPU_SHIFT); |
53 | val |= (data & CSR_MAIL_SEND_H32_MASK); |
54 | csr_writeq(val, LOONGSON_CSR_MAIL_SEND); |
55 | |
56 | /* send low 32 bits */ |
57 | val = CSR_MAIL_SEND_BLOCK; |
58 | val |= (CSR_MAIL_SEND_BOX_LOW(mailbox) << CSR_MAIL_SEND_BOX_SHIFT); |
59 | val |= (cpu << CSR_MAIL_SEND_CPU_SHIFT); |
60 | val |= (data << CSR_MAIL_SEND_BUF_SHIFT); |
61 | csr_writeq(val, LOONGSON_CSR_MAIL_SEND); |
62 | }; |
63 | |
64 | static u32 csr_ipi_read_clear(int cpu) |
65 | { |
66 | u32 action; |
67 | |
68 | /* Load the ipi register to figure out what we're supposed to do */ |
69 | action = csr_readl(LOONGSON_CSR_IPI_STATUS); |
70 | /* Clear the ipi register to clear the interrupt */ |
71 | csr_writel(action, LOONGSON_CSR_IPI_CLEAR); |
72 | |
73 | return action; |
74 | } |
75 | |
76 | static void csr_ipi_write_action(int cpu, u32 action) |
77 | { |
78 | unsigned int irq = 0; |
79 | |
80 | while ((irq = ffs(action))) { |
81 | uint32_t val = CSR_IPI_SEND_BLOCK; |
82 | val |= (irq - 1); |
83 | val |= (cpu << CSR_IPI_SEND_CPU_SHIFT); |
84 | csr_writel(val, LOONGSON_CSR_IPI_SEND); |
85 | action &= ~BIT(irq - 1); |
86 | } |
87 | } |
88 | |
89 | static void csr_ipi_write_enable(int cpu) |
90 | { |
91 | csr_writel(0xffffffff, LOONGSON_CSR_IPI_EN); |
92 | } |
93 | |
94 | static void csr_ipi_clear_buf(int cpu) |
95 | { |
96 | csr_writeq(0, LOONGSON_CSR_MAIL_BUF0); |
97 | } |
98 | |
99 | static void csr_ipi_write_buf(int cpu, struct task_struct *idle) |
100 | { |
101 | unsigned long startargs[4]; |
102 | |
103 | /* startargs[] are initial PC, SP and GP for secondary CPU */ |
104 | startargs[0] = (unsigned long)&smp_bootstrap; |
105 | startargs[1] = (unsigned long)__KSTK_TOS(idle); |
106 | startargs[2] = (unsigned long)task_thread_info(idle); |
107 | startargs[3] = 0; |
108 | |
109 | pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n" , |
110 | cpu, startargs[0], startargs[1], startargs[2]); |
111 | |
112 | csr_mail_send(data: startargs[3], cpu: cpu_logical_map(cpu), mailbox: 3); |
113 | csr_mail_send(data: startargs[2], cpu: cpu_logical_map(cpu), mailbox: 2); |
114 | csr_mail_send(data: startargs[1], cpu: cpu_logical_map(cpu), mailbox: 1); |
115 | csr_mail_send(data: startargs[0], cpu: cpu_logical_map(cpu), mailbox: 0); |
116 | } |
117 | |
118 | static u32 legacy_ipi_read_clear(int cpu) |
119 | { |
120 | u32 action; |
121 | |
122 | /* Load the ipi register to figure out what we're supposed to do */ |
123 | action = readl_relaxed(ipi_status0_regs[cpu_logical_map(cpu)]); |
124 | /* Clear the ipi register to clear the interrupt */ |
125 | writel_relaxed(action, ipi_clear0_regs[cpu_logical_map(cpu)]); |
126 | nudge_writes(); |
127 | |
128 | return action; |
129 | } |
130 | |
131 | static void legacy_ipi_write_action(int cpu, u32 action) |
132 | { |
133 | writel_relaxed((u32)action, ipi_set0_regs[cpu]); |
134 | nudge_writes(); |
135 | } |
136 | |
137 | static void legacy_ipi_write_enable(int cpu) |
138 | { |
139 | writel_relaxed(0xffffffff, ipi_en0_regs[cpu_logical_map(cpu)]); |
140 | } |
141 | |
142 | static void legacy_ipi_clear_buf(int cpu) |
143 | { |
144 | writeq_relaxed(0, ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); |
145 | } |
146 | |
147 | static void legacy_ipi_write_buf(int cpu, struct task_struct *idle) |
148 | { |
149 | unsigned long startargs[4]; |
150 | |
151 | /* startargs[] are initial PC, SP and GP for secondary CPU */ |
152 | startargs[0] = (unsigned long)&smp_bootstrap; |
153 | startargs[1] = (unsigned long)__KSTK_TOS(idle); |
154 | startargs[2] = (unsigned long)task_thread_info(idle); |
155 | startargs[3] = 0; |
156 | |
157 | pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n" , |
158 | cpu, startargs[0], startargs[1], startargs[2]); |
159 | |
160 | writeq_relaxed(startargs[3], |
161 | ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18); |
162 | writeq_relaxed(startargs[2], |
163 | ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10); |
164 | writeq_relaxed(startargs[1], |
165 | ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8); |
166 | writeq_relaxed(startargs[0], |
167 | ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); |
168 | nudge_writes(); |
169 | } |
170 | |
171 | static void csr_ipi_probe(void) |
172 | { |
173 | if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) { |
174 | ipi_read_clear = csr_ipi_read_clear; |
175 | ipi_write_action = csr_ipi_write_action; |
176 | ipi_write_enable = csr_ipi_write_enable; |
177 | ipi_clear_buf = csr_ipi_clear_buf; |
178 | ipi_write_buf = csr_ipi_write_buf; |
179 | } else { |
180 | ipi_read_clear = legacy_ipi_read_clear; |
181 | ipi_write_action = legacy_ipi_write_action; |
182 | ipi_write_enable = legacy_ipi_write_enable; |
183 | ipi_clear_buf = legacy_ipi_clear_buf; |
184 | ipi_write_buf = legacy_ipi_write_buf; |
185 | } |
186 | } |
187 | |
188 | static void ipi_set0_regs_init(void) |
189 | { |
190 | ipi_set0_regs[0] = (void __iomem *) |
191 | (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0); |
192 | ipi_set0_regs[1] = (void __iomem *) |
193 | (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0); |
194 | ipi_set0_regs[2] = (void __iomem *) |
195 | (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0); |
196 | ipi_set0_regs[3] = (void __iomem *) |
197 | (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0); |
198 | ipi_set0_regs[4] = (void __iomem *) |
199 | (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0); |
200 | ipi_set0_regs[5] = (void __iomem *) |
201 | (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0); |
202 | ipi_set0_regs[6] = (void __iomem *) |
203 | (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0); |
204 | ipi_set0_regs[7] = (void __iomem *) |
205 | (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0); |
206 | ipi_set0_regs[8] = (void __iomem *) |
207 | (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0); |
208 | ipi_set0_regs[9] = (void __iomem *) |
209 | (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0); |
210 | ipi_set0_regs[10] = (void __iomem *) |
211 | (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0); |
212 | ipi_set0_regs[11] = (void __iomem *) |
213 | (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0); |
214 | ipi_set0_regs[12] = (void __iomem *) |
215 | (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0); |
216 | ipi_set0_regs[13] = (void __iomem *) |
217 | (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0); |
218 | ipi_set0_regs[14] = (void __iomem *) |
219 | (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0); |
220 | ipi_set0_regs[15] = (void __iomem *) |
221 | (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0); |
222 | } |
223 | |
224 | static void ipi_clear0_regs_init(void) |
225 | { |
226 | ipi_clear0_regs[0] = (void __iomem *) |
227 | (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0); |
228 | ipi_clear0_regs[1] = (void __iomem *) |
229 | (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0); |
230 | ipi_clear0_regs[2] = (void __iomem *) |
231 | (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0); |
232 | ipi_clear0_regs[3] = (void __iomem *) |
233 | (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0); |
234 | ipi_clear0_regs[4] = (void __iomem *) |
235 | (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0); |
236 | ipi_clear0_regs[5] = (void __iomem *) |
237 | (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0); |
238 | ipi_clear0_regs[6] = (void __iomem *) |
239 | (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0); |
240 | ipi_clear0_regs[7] = (void __iomem *) |
241 | (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0); |
242 | ipi_clear0_regs[8] = (void __iomem *) |
243 | (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0); |
244 | ipi_clear0_regs[9] = (void __iomem *) |
245 | (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0); |
246 | ipi_clear0_regs[10] = (void __iomem *) |
247 | (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0); |
248 | ipi_clear0_regs[11] = (void __iomem *) |
249 | (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0); |
250 | ipi_clear0_regs[12] = (void __iomem *) |
251 | (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0); |
252 | ipi_clear0_regs[13] = (void __iomem *) |
253 | (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0); |
254 | ipi_clear0_regs[14] = (void __iomem *) |
255 | (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0); |
256 | ipi_clear0_regs[15] = (void __iomem *) |
257 | (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0); |
258 | } |
259 | |
260 | static void ipi_status0_regs_init(void) |
261 | { |
262 | ipi_status0_regs[0] = (void __iomem *) |
263 | (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0); |
264 | ipi_status0_regs[1] = (void __iomem *) |
265 | (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0); |
266 | ipi_status0_regs[2] = (void __iomem *) |
267 | (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0); |
268 | ipi_status0_regs[3] = (void __iomem *) |
269 | (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0); |
270 | ipi_status0_regs[4] = (void __iomem *) |
271 | (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0); |
272 | ipi_status0_regs[5] = (void __iomem *) |
273 | (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0); |
274 | ipi_status0_regs[6] = (void __iomem *) |
275 | (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0); |
276 | ipi_status0_regs[7] = (void __iomem *) |
277 | (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0); |
278 | ipi_status0_regs[8] = (void __iomem *) |
279 | (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0); |
280 | ipi_status0_regs[9] = (void __iomem *) |
281 | (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0); |
282 | ipi_status0_regs[10] = (void __iomem *) |
283 | (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0); |
284 | ipi_status0_regs[11] = (void __iomem *) |
285 | (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0); |
286 | ipi_status0_regs[12] = (void __iomem *) |
287 | (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0); |
288 | ipi_status0_regs[13] = (void __iomem *) |
289 | (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0); |
290 | ipi_status0_regs[14] = (void __iomem *) |
291 | (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0); |
292 | ipi_status0_regs[15] = (void __iomem *) |
293 | (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0); |
294 | } |
295 | |
296 | static void ipi_en0_regs_init(void) |
297 | { |
298 | ipi_en0_regs[0] = (void __iomem *) |
299 | (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0); |
300 | ipi_en0_regs[1] = (void __iomem *) |
301 | (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0); |
302 | ipi_en0_regs[2] = (void __iomem *) |
303 | (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0); |
304 | ipi_en0_regs[3] = (void __iomem *) |
305 | (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0); |
306 | ipi_en0_regs[4] = (void __iomem *) |
307 | (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0); |
308 | ipi_en0_regs[5] = (void __iomem *) |
309 | (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0); |
310 | ipi_en0_regs[6] = (void __iomem *) |
311 | (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0); |
312 | ipi_en0_regs[7] = (void __iomem *) |
313 | (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0); |
314 | ipi_en0_regs[8] = (void __iomem *) |
315 | (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0); |
316 | ipi_en0_regs[9] = (void __iomem *) |
317 | (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0); |
318 | ipi_en0_regs[10] = (void __iomem *) |
319 | (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0); |
320 | ipi_en0_regs[11] = (void __iomem *) |
321 | (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0); |
322 | ipi_en0_regs[12] = (void __iomem *) |
323 | (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0); |
324 | ipi_en0_regs[13] = (void __iomem *) |
325 | (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0); |
326 | ipi_en0_regs[14] = (void __iomem *) |
327 | (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0); |
328 | ipi_en0_regs[15] = (void __iomem *) |
329 | (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0); |
330 | } |
331 | |
332 | static void ipi_mailbox_buf_init(void) |
333 | { |
334 | ipi_mailbox_buf[0] = (void __iomem *) |
335 | (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF); |
336 | ipi_mailbox_buf[1] = (void __iomem *) |
337 | (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF); |
338 | ipi_mailbox_buf[2] = (void __iomem *) |
339 | (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF); |
340 | ipi_mailbox_buf[3] = (void __iomem *) |
341 | (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF); |
342 | ipi_mailbox_buf[4] = (void __iomem *) |
343 | (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF); |
344 | ipi_mailbox_buf[5] = (void __iomem *) |
345 | (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF); |
346 | ipi_mailbox_buf[6] = (void __iomem *) |
347 | (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF); |
348 | ipi_mailbox_buf[7] = (void __iomem *) |
349 | (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF); |
350 | ipi_mailbox_buf[8] = (void __iomem *) |
351 | (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF); |
352 | ipi_mailbox_buf[9] = (void __iomem *) |
353 | (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF); |
354 | ipi_mailbox_buf[10] = (void __iomem *) |
355 | (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF); |
356 | ipi_mailbox_buf[11] = (void __iomem *) |
357 | (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF); |
358 | ipi_mailbox_buf[12] = (void __iomem *) |
359 | (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF); |
360 | ipi_mailbox_buf[13] = (void __iomem *) |
361 | (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF); |
362 | ipi_mailbox_buf[14] = (void __iomem *) |
363 | (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF); |
364 | ipi_mailbox_buf[15] = (void __iomem *) |
365 | (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF); |
366 | } |
367 | |
368 | /* |
369 | * Simple enough, just poke the appropriate ipi register |
370 | */ |
371 | static void loongson3_send_ipi_single(int cpu, unsigned int action) |
372 | { |
373 | ipi_write_action(cpu_logical_map(cpu), (u32)action); |
374 | } |
375 | |
376 | static void |
377 | loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) |
378 | { |
379 | unsigned int i; |
380 | |
381 | for_each_cpu(i, mask) |
382 | ipi_write_action(cpu_logical_map(i), (u32)action); |
383 | } |
384 | |
385 | |
386 | static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id) |
387 | { |
388 | int i, cpu = smp_processor_id(); |
389 | unsigned int action, c0count; |
390 | |
391 | action = ipi_read_clear(cpu); |
392 | |
393 | if (action & SMP_RESCHEDULE_YOURSELF) |
394 | scheduler_ipi(); |
395 | |
396 | if (action & SMP_CALL_FUNCTION) { |
397 | irq_enter(); |
398 | generic_smp_call_function_interrupt(); |
399 | irq_exit(); |
400 | } |
401 | |
402 | if (action & SMP_ASK_C0COUNT) { |
403 | BUG_ON(cpu != 0); |
404 | c0count = read_c0_count(); |
405 | c0count = c0count ? c0count : 1; |
406 | for (i = 1; i < nr_cpu_ids; i++) |
407 | core0_c0count[i] = c0count; |
408 | nudge_writes(); /* Let others see the result ASAP */ |
409 | } |
410 | |
411 | return IRQ_HANDLED; |
412 | } |
413 | |
414 | #define MAX_LOOPS 800 |
415 | /* |
416 | * SMP init and finish on secondary CPUs |
417 | */ |
418 | static void loongson3_init_secondary(void) |
419 | { |
420 | int i; |
421 | uint32_t initcount; |
422 | unsigned int cpu = smp_processor_id(); |
423 | unsigned int imask = STATUSF_IP7 | STATUSF_IP6 | |
424 | STATUSF_IP3 | STATUSF_IP2; |
425 | |
426 | /* Set interrupt mask, but don't enable */ |
427 | change_c0_status(ST0_IM, imask); |
428 | ipi_write_enable(cpu); |
429 | |
430 | per_cpu(cpu_state, cpu) = CPU_ONLINE; |
431 | cpu_set_core(&cpu_data[cpu], |
432 | cpu_logical_map(cpu) % loongson_sysconf.cores_per_package); |
433 | cpu_data[cpu].package = |
434 | cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; |
435 | |
436 | i = 0; |
437 | core0_c0count[cpu] = 0; |
438 | loongson3_send_ipi_single(0, SMP_ASK_C0COUNT); |
439 | while (!core0_c0count[cpu]) { |
440 | i++; |
441 | cpu_relax(); |
442 | } |
443 | |
444 | if (i > MAX_LOOPS) |
445 | i = MAX_LOOPS; |
446 | if (cpu_data[cpu].package) |
447 | initcount = core0_c0count[cpu] + i; |
448 | else /* Local access is faster for loops */ |
449 | initcount = core0_c0count[cpu] + i/2; |
450 | |
451 | write_c0_count(initcount); |
452 | } |
453 | |
454 | static void loongson3_smp_finish(void) |
455 | { |
456 | int cpu = smp_processor_id(); |
457 | |
458 | write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); |
459 | local_irq_enable(); |
460 | ipi_clear_buf(cpu); |
461 | |
462 | pr_info("CPU#%d finished, CP0_ST=%x\n" , |
463 | smp_processor_id(), read_c0_status()); |
464 | } |
465 | |
466 | static void __init loongson3_smp_setup(void) |
467 | { |
468 | int i = 0, num = 0; /* i: physical id, num: logical id */ |
469 | |
470 | init_cpu_possible(cpu_none_mask); |
471 | |
472 | /* For unified kernel, NR_CPUS is the maximum possible value, |
473 | * loongson_sysconf.nr_cpus is the really present value |
474 | */ |
475 | while (i < loongson_sysconf.nr_cpus) { |
476 | if (loongson_sysconf.reserved_cpus_mask & (1<<i)) { |
477 | /* Reserved physical CPU cores */ |
478 | __cpu_number_map[i] = -1; |
479 | } else { |
480 | __cpu_number_map[i] = num; |
481 | __cpu_logical_map[num] = i; |
482 | set_cpu_possible(cpu: num, possible: true); |
483 | /* Loongson processors are always grouped by 4 */ |
484 | cpu_set_cluster(&cpu_data[num], i / 4); |
485 | num++; |
486 | } |
487 | i++; |
488 | } |
489 | pr_info("Detected %i available CPU(s)\n" , num); |
490 | |
491 | while (num < loongson_sysconf.nr_cpus) { |
492 | __cpu_logical_map[num] = -1; |
493 | num++; |
494 | } |
495 | |
496 | csr_ipi_probe(); |
497 | ipi_set0_regs_init(); |
498 | ipi_clear0_regs_init(); |
499 | ipi_status0_regs_init(); |
500 | ipi_en0_regs_init(); |
501 | ipi_mailbox_buf_init(); |
502 | ipi_write_enable(0); |
503 | |
504 | cpu_set_core(&cpu_data[0], |
505 | cpu_logical_map(0) % loongson_sysconf.cores_per_package); |
506 | cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; |
507 | } |
508 | |
509 | static void __init loongson3_prepare_cpus(unsigned int max_cpus) |
510 | { |
511 | if (request_irq(LS_IPI_IRQ, loongson3_ipi_interrupt, |
512 | IRQF_PERCPU | IRQF_NO_SUSPEND, "SMP_IPI" , NULL)) |
513 | pr_err("Failed to request IPI IRQ\n" ); |
514 | init_cpu_present(cpu_possible_mask); |
515 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
516 | } |
517 | |
518 | /* |
519 | * Setup the PC, SP, and GP of a secondary processor and start it running! |
520 | */ |
521 | static int loongson3_boot_secondary(int cpu, struct task_struct *idle) |
522 | { |
523 | pr_info("Booting CPU#%d...\n" , cpu); |
524 | |
525 | ipi_write_buf(cpu, idle); |
526 | |
527 | return 0; |
528 | } |
529 | |
530 | #ifdef CONFIG_HOTPLUG_CPU |
531 | |
532 | static int loongson3_cpu_disable(void) |
533 | { |
534 | unsigned long flags; |
535 | unsigned int cpu = smp_processor_id(); |
536 | |
537 | set_cpu_online(cpu, online: false); |
538 | calculate_cpu_foreign_map(); |
539 | local_irq_save(flags); |
540 | clear_c0_status(ST0_IM); |
541 | local_irq_restore(flags); |
542 | local_flush_tlb_all(); |
543 | |
544 | return 0; |
545 | } |
546 | |
547 | |
548 | static void loongson3_cpu_die(unsigned int cpu) |
549 | { |
550 | while (per_cpu(cpu_state, cpu) != CPU_DEAD) |
551 | cpu_relax(); |
552 | |
553 | mb(); |
554 | } |
555 | |
556 | /* To shutdown a core in Loongson 3, the target core should go to CKSEG1 and |
557 | * flush all L1 entries at first. Then, another core (usually Core 0) can |
558 | * safely disable the clock of the target core. loongson3_play_dead() is |
559 | * called via CKSEG1 (uncached and unmmaped) |
560 | */ |
561 | static void loongson3_type1_play_dead(int *state_addr) |
562 | { |
563 | register int val; |
564 | register long cpuid, core, node, count; |
565 | register void *addr, *base, *initfunc; |
566 | |
567 | __asm__ __volatile__( |
568 | " .set push \n" |
569 | " .set noreorder \n" |
570 | " li %[addr], 0x80000000 \n" /* KSEG0 */ |
571 | "1: cache 0, 0(%[addr]) \n" /* flush L1 ICache */ |
572 | " cache 0, 1(%[addr]) \n" |
573 | " cache 0, 2(%[addr]) \n" |
574 | " cache 0, 3(%[addr]) \n" |
575 | " cache 1, 0(%[addr]) \n" /* flush L1 DCache */ |
576 | " cache 1, 1(%[addr]) \n" |
577 | " cache 1, 2(%[addr]) \n" |
578 | " cache 1, 3(%[addr]) \n" |
579 | " addiu %[sets], %[sets], -1 \n" |
580 | " bnez %[sets], 1b \n" |
581 | " addiu %[addr], %[addr], 0x20 \n" |
582 | " li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ |
583 | " sw %[val], (%[state_addr]) \n" |
584 | " sync \n" |
585 | " cache 21, (%[state_addr]) \n" /* flush entry of *state_addr */ |
586 | " .set pop \n" |
587 | : [addr] "=&r" (addr), [val] "=&r" (val) |
588 | : [state_addr] "r" (state_addr), |
589 | [sets] "r" (cpu_data[smp_processor_id()].dcache.sets)); |
590 | |
591 | __asm__ __volatile__( |
592 | " .set push \n" |
593 | " .set noreorder \n" |
594 | " .set mips64 \n" |
595 | " mfc0 %[cpuid], $15, 1 \n" |
596 | " andi %[cpuid], 0x3ff \n" |
597 | " dli %[base], 0x900000003ff01000 \n" |
598 | " andi %[core], %[cpuid], 0x3 \n" |
599 | " sll %[core], 8 \n" /* get core id */ |
600 | " or %[base], %[base], %[core] \n" |
601 | " andi %[node], %[cpuid], 0xc \n" |
602 | " dsll %[node], 42 \n" /* get node id */ |
603 | " or %[base], %[base], %[node] \n" |
604 | "1: li %[count], 0x100 \n" /* wait for init loop */ |
605 | "2: bnez %[count], 2b \n" /* limit mailbox access */ |
606 | " addiu %[count], -1 \n" |
607 | " ld %[initfunc], 0x20(%[base]) \n" /* get PC via mailbox */ |
608 | " beqz %[initfunc], 1b \n" |
609 | " nop \n" |
610 | " ld $sp, 0x28(%[base]) \n" /* get SP via mailbox */ |
611 | " ld $gp, 0x30(%[base]) \n" /* get GP via mailbox */ |
612 | " ld $a1, 0x38(%[base]) \n" |
613 | " jr %[initfunc] \n" /* jump to initial PC */ |
614 | " nop \n" |
615 | " .set pop \n" |
616 | : [core] "=&r" (core), [node] "=&r" (node), |
617 | [base] "=&r" (base), [cpuid] "=&r" (cpuid), |
618 | [count] "=&r" (count), [initfunc] "=&r" (initfunc) |
619 | : /* No Input */ |
620 | : "a1" ); |
621 | } |
622 | |
623 | static void loongson3_type2_play_dead(int *state_addr) |
624 | { |
625 | register int val; |
626 | register long cpuid, core, node, count; |
627 | register void *addr, *base, *initfunc; |
628 | |
629 | __asm__ __volatile__( |
630 | " .set push \n" |
631 | " .set noreorder \n" |
632 | " li %[addr], 0x80000000 \n" /* KSEG0 */ |
633 | "1: cache 0, 0(%[addr]) \n" /* flush L1 ICache */ |
634 | " cache 0, 1(%[addr]) \n" |
635 | " cache 0, 2(%[addr]) \n" |
636 | " cache 0, 3(%[addr]) \n" |
637 | " cache 1, 0(%[addr]) \n" /* flush L1 DCache */ |
638 | " cache 1, 1(%[addr]) \n" |
639 | " cache 1, 2(%[addr]) \n" |
640 | " cache 1, 3(%[addr]) \n" |
641 | " addiu %[sets], %[sets], -1 \n" |
642 | " bnez %[sets], 1b \n" |
643 | " addiu %[addr], %[addr], 0x20 \n" |
644 | " li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ |
645 | " sw %[val], (%[state_addr]) \n" |
646 | " sync \n" |
647 | " cache 21, (%[state_addr]) \n" /* flush entry of *state_addr */ |
648 | " .set pop \n" |
649 | : [addr] "=&r" (addr), [val] "=&r" (val) |
650 | : [state_addr] "r" (state_addr), |
651 | [sets] "r" (cpu_data[smp_processor_id()].dcache.sets)); |
652 | |
653 | __asm__ __volatile__( |
654 | " .set push \n" |
655 | " .set noreorder \n" |
656 | " .set mips64 \n" |
657 | " mfc0 %[cpuid], $15, 1 \n" |
658 | " andi %[cpuid], 0x3ff \n" |
659 | " dli %[base], 0x900000003ff01000 \n" |
660 | " andi %[core], %[cpuid], 0x3 \n" |
661 | " sll %[core], 8 \n" /* get core id */ |
662 | " or %[base], %[base], %[core] \n" |
663 | " andi %[node], %[cpuid], 0xc \n" |
664 | " dsll %[node], 42 \n" /* get node id */ |
665 | " or %[base], %[base], %[node] \n" |
666 | " dsrl %[node], 30 \n" /* 15:14 */ |
667 | " or %[base], %[base], %[node] \n" |
668 | "1: li %[count], 0x100 \n" /* wait for init loop */ |
669 | "2: bnez %[count], 2b \n" /* limit mailbox access */ |
670 | " addiu %[count], -1 \n" |
671 | " ld %[initfunc], 0x20(%[base]) \n" /* get PC via mailbox */ |
672 | " beqz %[initfunc], 1b \n" |
673 | " nop \n" |
674 | " ld $sp, 0x28(%[base]) \n" /* get SP via mailbox */ |
675 | " ld $gp, 0x30(%[base]) \n" /* get GP via mailbox */ |
676 | " ld $a1, 0x38(%[base]) \n" |
677 | " jr %[initfunc] \n" /* jump to initial PC */ |
678 | " nop \n" |
679 | " .set pop \n" |
680 | : [core] "=&r" (core), [node] "=&r" (node), |
681 | [base] "=&r" (base), [cpuid] "=&r" (cpuid), |
682 | [count] "=&r" (count), [initfunc] "=&r" (initfunc) |
683 | : /* No Input */ |
684 | : "a1" ); |
685 | } |
686 | |
687 | static void loongson3_type3_play_dead(int *state_addr) |
688 | { |
689 | register int val; |
690 | register long cpuid, core, node, count; |
691 | register void *addr, *base, *initfunc; |
692 | |
693 | __asm__ __volatile__( |
694 | " .set push \n" |
695 | " .set noreorder \n" |
696 | " li %[addr], 0x80000000 \n" /* KSEG0 */ |
697 | "1: cache 0, 0(%[addr]) \n" /* flush L1 ICache */ |
698 | " cache 0, 1(%[addr]) \n" |
699 | " cache 0, 2(%[addr]) \n" |
700 | " cache 0, 3(%[addr]) \n" |
701 | " cache 1, 0(%[addr]) \n" /* flush L1 DCache */ |
702 | " cache 1, 1(%[addr]) \n" |
703 | " cache 1, 2(%[addr]) \n" |
704 | " cache 1, 3(%[addr]) \n" |
705 | " addiu %[sets], %[sets], -1 \n" |
706 | " bnez %[sets], 1b \n" |
707 | " addiu %[addr], %[addr], 0x40 \n" |
708 | " li %[addr], 0x80000000 \n" /* KSEG0 */ |
709 | "2: cache 2, 0(%[addr]) \n" /* flush L1 VCache */ |
710 | " cache 2, 1(%[addr]) \n" |
711 | " cache 2, 2(%[addr]) \n" |
712 | " cache 2, 3(%[addr]) \n" |
713 | " cache 2, 4(%[addr]) \n" |
714 | " cache 2, 5(%[addr]) \n" |
715 | " cache 2, 6(%[addr]) \n" |
716 | " cache 2, 7(%[addr]) \n" |
717 | " cache 2, 8(%[addr]) \n" |
718 | " cache 2, 9(%[addr]) \n" |
719 | " cache 2, 10(%[addr]) \n" |
720 | " cache 2, 11(%[addr]) \n" |
721 | " cache 2, 12(%[addr]) \n" |
722 | " cache 2, 13(%[addr]) \n" |
723 | " cache 2, 14(%[addr]) \n" |
724 | " cache 2, 15(%[addr]) \n" |
725 | " addiu %[vsets], %[vsets], -1 \n" |
726 | " bnez %[vsets], 2b \n" |
727 | " addiu %[addr], %[addr], 0x40 \n" |
728 | " li %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ |
729 | " sw %[val], (%[state_addr]) \n" |
730 | " sync \n" |
731 | " cache 21, (%[state_addr]) \n" /* flush entry of *state_addr */ |
732 | " .set pop \n" |
733 | : [addr] "=&r" (addr), [val] "=&r" (val) |
734 | : [state_addr] "r" (state_addr), |
735 | [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), |
736 | [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); |
737 | |
738 | __asm__ __volatile__( |
739 | " .set push \n" |
740 | " .set noreorder \n" |
741 | " .set mips64 \n" |
742 | " mfc0 %[cpuid], $15, 1 \n" |
743 | " andi %[cpuid], 0x3ff \n" |
744 | " dli %[base], 0x900000003ff01000 \n" |
745 | " andi %[core], %[cpuid], 0x3 \n" |
746 | " sll %[core], 8 \n" /* get core id */ |
747 | " or %[base], %[base], %[core] \n" |
748 | " andi %[node], %[cpuid], 0xc \n" |
749 | " dsll %[node], 42 \n" /* get node id */ |
750 | " or %[base], %[base], %[node] \n" |
751 | "1: li %[count], 0x100 \n" /* wait for init loop */ |
752 | "2: bnez %[count], 2b \n" /* limit mailbox access */ |
753 | " addiu %[count], -1 \n" |
754 | " lw %[initfunc], 0x20(%[base]) \n" /* check lower 32-bit as jump indicator */ |
755 | " beqz %[initfunc], 1b \n" |
756 | " nop \n" |
757 | " ld %[initfunc], 0x20(%[base]) \n" /* get PC (whole 64-bit) via mailbox */ |
758 | " ld $sp, 0x28(%[base]) \n" /* get SP via mailbox */ |
759 | " ld $gp, 0x30(%[base]) \n" /* get GP via mailbox */ |
760 | " ld $a1, 0x38(%[base]) \n" |
761 | " jr %[initfunc] \n" /* jump to initial PC */ |
762 | " nop \n" |
763 | " .set pop \n" |
764 | : [core] "=&r" (core), [node] "=&r" (node), |
765 | [base] "=&r" (base), [cpuid] "=&r" (cpuid), |
766 | [count] "=&r" (count), [initfunc] "=&r" (initfunc) |
767 | : /* No Input */ |
768 | : "a1" ); |
769 | } |
770 | |
771 | void play_dead(void) |
772 | { |
773 | int prid_imp, prid_rev, *state_addr; |
774 | unsigned int cpu = smp_processor_id(); |
775 | void (*play_dead_at_ckseg1)(int *); |
776 | |
777 | idle_task_exit(); |
778 | cpuhp_ap_report_dead(); |
779 | |
780 | prid_imp = read_c0_prid() & PRID_IMP_MASK; |
781 | prid_rev = read_c0_prid() & PRID_REV_MASK; |
782 | |
783 | if (prid_imp == PRID_IMP_LOONGSON_64G) { |
784 | play_dead_at_ckseg1 = |
785 | (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead); |
786 | goto out; |
787 | } |
788 | |
789 | switch (prid_rev) { |
790 | case PRID_REV_LOONGSON3A_R1: |
791 | default: |
792 | play_dead_at_ckseg1 = |
793 | (void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead); |
794 | break; |
795 | case PRID_REV_LOONGSON3B_R1: |
796 | case PRID_REV_LOONGSON3B_R2: |
797 | play_dead_at_ckseg1 = |
798 | (void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead); |
799 | break; |
800 | case PRID_REV_LOONGSON3A_R2_0: |
801 | case PRID_REV_LOONGSON3A_R2_1: |
802 | case PRID_REV_LOONGSON3A_R3_0: |
803 | case PRID_REV_LOONGSON3A_R3_1: |
804 | play_dead_at_ckseg1 = |
805 | (void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead); |
806 | break; |
807 | } |
808 | |
809 | out: |
810 | state_addr = &per_cpu(cpu_state, cpu); |
811 | mb(); |
812 | play_dead_at_ckseg1(state_addr); |
813 | BUG(); |
814 | } |
815 | |
816 | static int loongson3_disable_clock(unsigned int cpu) |
817 | { |
818 | uint64_t core_id = cpu_core(&cpu_data[cpu]); |
819 | uint64_t package_id = cpu_data[cpu].package; |
820 | |
821 | if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) { |
822 | LOONGSON_CHIPCFG(package_id) &= ~(1 << (12 + core_id)); |
823 | } else { |
824 | if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG)) |
825 | LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3)); |
826 | } |
827 | return 0; |
828 | } |
829 | |
830 | static int loongson3_enable_clock(unsigned int cpu) |
831 | { |
832 | uint64_t core_id = cpu_core(&cpu_data[cpu]); |
833 | uint64_t package_id = cpu_data[cpu].package; |
834 | |
835 | if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) { |
836 | LOONGSON_CHIPCFG(package_id) |= 1 << (12 + core_id); |
837 | } else { |
838 | if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG)) |
839 | LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3); |
840 | } |
841 | return 0; |
842 | } |
843 | |
844 | static int register_loongson3_notifier(void) |
845 | { |
846 | return cpuhp_setup_state_nocalls(state: CPUHP_MIPS_SOC_PREPARE, |
847 | name: "mips/loongson:prepare" , |
848 | startup: loongson3_enable_clock, |
849 | teardown: loongson3_disable_clock); |
850 | } |
851 | early_initcall(register_loongson3_notifier); |
852 | |
853 | #endif |
854 | |
855 | const struct plat_smp_ops loongson3_smp_ops = { |
856 | .send_ipi_single = loongson3_send_ipi_single, |
857 | .send_ipi_mask = loongson3_send_ipi_mask, |
858 | .init_secondary = loongson3_init_secondary, |
859 | .smp_finish = loongson3_smp_finish, |
860 | .boot_secondary = loongson3_boot_secondary, |
861 | .smp_setup = loongson3_smp_setup, |
862 | .prepare_cpus = loongson3_prepare_cpus, |
863 | #ifdef CONFIG_HOTPLUG_CPU |
864 | .cpu_disable = loongson3_cpu_disable, |
865 | .cpu_die = loongson3_cpu_die, |
866 | #endif |
867 | #ifdef CONFIG_KEXEC_CORE |
868 | .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, |
869 | #endif |
870 | }; |
871 | |