1 | /* |
2 | * This file is subject to the terms and conditions of the GNU General Public |
3 | * License. See the file "COPYING" in the main directory of this archive |
4 | * for more details. |
5 | * |
6 | * Copyright (C) 2007 by Ralf Baechle |
7 | * Copyright (C) 2009, 2012 Cavium, Inc. |
8 | */ |
9 | #include <linux/clocksource.h> |
10 | #include <linux/sched/clock.h> |
11 | #include <linux/export.h> |
12 | #include <linux/init.h> |
13 | #include <linux/smp.h> |
14 | |
15 | #include <asm/cpu-info.h> |
16 | #include <asm/cpu-type.h> |
17 | #include <asm/time.h> |
18 | |
19 | #include <asm/octeon/octeon.h> |
20 | #include <asm/octeon/cvmx-ipd-defs.h> |
21 | #include <asm/octeon/cvmx-mio-defs.h> |
22 | #include <asm/octeon/cvmx-rst-defs.h> |
23 | #include <asm/octeon/cvmx-fpa-defs.h> |
24 | |
25 | static u64 f; |
26 | static u64 rdiv; |
27 | static u64 sdiv; |
28 | static u64 octeon_udelay_factor; |
29 | static u64 octeon_ndelay_factor; |
30 | |
31 | void __init octeon_setup_delays(void) |
32 | { |
33 | octeon_udelay_factor = octeon_get_clock_rate() / 1000000; |
34 | /* |
35 | * For __ndelay we divide by 2^16, so the factor is multiplied |
36 | * by the same amount. |
37 | */ |
38 | octeon_ndelay_factor = (octeon_udelay_factor * 0x10000ull) / 1000ull; |
39 | |
40 | preset_lpj = octeon_get_clock_rate() / HZ; |
41 | |
42 | if (current_cpu_type() == CPU_CAVIUM_OCTEON2) { |
43 | union cvmx_mio_rst_boot rst_boot; |
44 | |
45 | rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT); |
46 | rdiv = rst_boot.s.c_mul; /* CPU clock */ |
47 | sdiv = rst_boot.s.pnr_mul; /* I/O clock */ |
48 | f = (0x8000000000000000ull / sdiv) * 2; |
49 | } else if (current_cpu_type() == CPU_CAVIUM_OCTEON3) { |
50 | union cvmx_rst_boot rst_boot; |
51 | |
52 | rst_boot.u64 = cvmx_read_csr(CVMX_RST_BOOT); |
53 | rdiv = rst_boot.s.c_mul; /* CPU clock */ |
54 | sdiv = rst_boot.s.pnr_mul; /* I/O clock */ |
55 | f = (0x8000000000000000ull / sdiv) * 2; |
56 | } |
57 | |
58 | } |
59 | |
60 | /* |
61 | * Set the current core's cvmcount counter to the value of the |
62 | * IPD_CLK_COUNT. We do this on all cores as they are brought |
63 | * on-line. This allows for a read from a local cpu register to |
64 | * access a synchronized counter. |
65 | * |
66 | * On CPU_CAVIUM_OCTEON2 the IPD_CLK_COUNT is scaled by rdiv/sdiv. |
67 | */ |
68 | void octeon_init_cvmcount(void) |
69 | { |
70 | u64 clk_reg; |
71 | unsigned long flags; |
72 | unsigned loops = 2; |
73 | |
74 | clk_reg = octeon_has_feature(OCTEON_FEATURE_FPA3) ? |
75 | CVMX_FPA_CLK_COUNT : CVMX_IPD_CLK_COUNT; |
76 | |
77 | /* Clobber loops so GCC will not unroll the following while loop. */ |
78 | asm("" : "+r" (loops)); |
79 | |
80 | local_irq_save(flags); |
81 | /* |
82 | * Loop several times so we are executing from the cache, |
83 | * which should give more deterministic timing. |
84 | */ |
85 | while (loops--) { |
86 | u64 clk_count = cvmx_read_csr(clk_reg); |
87 | if (rdiv != 0) { |
88 | clk_count *= rdiv; |
89 | if (f != 0) { |
90 | asm("dmultu\t%[cnt],%[f]\n\t" |
91 | "mfhi\t%[cnt]" |
92 | : [cnt] "+r" (clk_count) |
93 | : [f] "r" (f) |
94 | : "hi" , "lo" ); |
95 | } |
96 | } |
97 | write_c0_cvmcount(clk_count); |
98 | } |
99 | local_irq_restore(flags); |
100 | } |
101 | |
102 | static u64 octeon_cvmcount_read(struct clocksource *cs) |
103 | { |
104 | return read_c0_cvmcount(); |
105 | } |
106 | |
107 | static struct clocksource clocksource_mips = { |
108 | .name = "OCTEON_CVMCOUNT" , |
109 | .read = octeon_cvmcount_read, |
110 | .mask = CLOCKSOURCE_MASK(64), |
111 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
112 | }; |
113 | |
114 | unsigned long long notrace sched_clock(void) |
115 | { |
116 | /* 64-bit arithmetic can overflow, so use 128-bit. */ |
117 | u64 t1, t2, t3; |
118 | unsigned long long rv; |
119 | u64 mult = clocksource_mips.mult; |
120 | u64 shift = clocksource_mips.shift; |
121 | u64 cnt = read_c0_cvmcount(); |
122 | |
123 | asm ( |
124 | "dmultu\t%[cnt],%[mult]\n\t" |
125 | "nor\t%[t1],$0,%[shift]\n\t" |
126 | "mfhi\t%[t2]\n\t" |
127 | "mflo\t%[t3]\n\t" |
128 | "dsll\t%[t2],%[t2],1\n\t" |
129 | "dsrlv\t%[rv],%[t3],%[shift]\n\t" |
130 | "dsllv\t%[t1],%[t2],%[t1]\n\t" |
131 | "or\t%[rv],%[t1],%[rv]\n\t" |
132 | : [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3) |
133 | : [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift) |
134 | : "hi" , "lo" ); |
135 | return rv; |
136 | } |
137 | |
138 | void __init plat_time_init(void) |
139 | { |
140 | clocksource_mips.rating = 300; |
141 | clocksource_register_hz(cs: &clocksource_mips, hz: octeon_get_clock_rate()); |
142 | } |
143 | |
144 | void __udelay(unsigned long us) |
145 | { |
146 | u64 cur, end, inc; |
147 | |
148 | cur = read_c0_cvmcount(); |
149 | |
150 | inc = us * octeon_udelay_factor; |
151 | end = cur + inc; |
152 | |
153 | while (end > cur) |
154 | cur = read_c0_cvmcount(); |
155 | } |
156 | EXPORT_SYMBOL(__udelay); |
157 | |
158 | void __ndelay(unsigned long ns) |
159 | { |
160 | u64 cur, end, inc; |
161 | |
162 | cur = read_c0_cvmcount(); |
163 | |
164 | inc = ((ns * octeon_ndelay_factor) >> 16); |
165 | end = cur + inc; |
166 | |
167 | while (end > cur) |
168 | cur = read_c0_cvmcount(); |
169 | } |
170 | EXPORT_SYMBOL(__ndelay); |
171 | |
172 | void __delay(unsigned long loops) |
173 | { |
174 | u64 cur, end; |
175 | |
176 | cur = read_c0_cvmcount(); |
177 | end = cur + loops; |
178 | |
179 | while (end > cur) |
180 | cur = read_c0_cvmcount(); |
181 | } |
182 | EXPORT_SYMBOL(__delay); |
183 | |
184 | |
185 | /** |
186 | * octeon_io_clk_delay - wait for a given number of io clock cycles to pass. |
187 | * |
188 | * We scale the wait by the clock ratio, and then wait for the |
189 | * corresponding number of core clocks. |
190 | * |
191 | * @count: The number of clocks to wait. |
192 | */ |
193 | void octeon_io_clk_delay(unsigned long count) |
194 | { |
195 | u64 cur, end; |
196 | |
197 | cur = read_c0_cvmcount(); |
198 | if (rdiv != 0) { |
199 | end = count * rdiv; |
200 | if (f != 0) { |
201 | asm("dmultu\t%[cnt],%[f]\n\t" |
202 | "mfhi\t%[cnt]" |
203 | : [cnt] "+r" (end) |
204 | : [f] "r" (f) |
205 | : "hi" , "lo" ); |
206 | } |
207 | end = cur + end; |
208 | } else { |
209 | end = cur + count; |
210 | } |
211 | while (end > cur) |
212 | cur = read_c0_cvmcount(); |
213 | } |
214 | EXPORT_SYMBOL(octeon_io_clk_delay); |
215 | |