1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * FPU data structures: |
4 | */ |
5 | #ifndef _ASM_X86_FPU_H |
6 | #define _ASM_X86_FPU_H |
7 | |
8 | /* |
9 | * The legacy x87 FPU state format, as saved by FSAVE and |
10 | * restored by the FRSTOR instructions: |
11 | */ |
12 | struct fregs_state { |
13 | u32 cwd; /* FPU Control Word */ |
14 | u32 swd; /* FPU Status Word */ |
15 | u32 twd; /* FPU Tag Word */ |
16 | u32 fip; /* FPU IP Offset */ |
17 | u32 fcs; /* FPU IP Selector */ |
18 | u32 foo; /* FPU Operand Pointer Offset */ |
19 | u32 fos; /* FPU Operand Pointer Selector */ |
20 | |
21 | /* 8*10 bytes for each FP-reg = 80 bytes: */ |
22 | u32 st_space[20]; |
23 | |
24 | /* Software status information [not touched by FSAVE]: */ |
25 | u32 status; |
26 | }; |
27 | |
28 | /* |
29 | * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and |
30 | * restored by the FXRSTOR instructions. It's similar to the FSAVE |
31 | * format, but differs in some areas, plus has extensions at |
32 | * the end for the XMM registers. |
33 | */ |
34 | struct fxregs_state { |
35 | u16 cwd; /* Control Word */ |
36 | u16 swd; /* Status Word */ |
37 | u16 twd; /* Tag Word */ |
38 | u16 fop; /* Last Instruction Opcode */ |
39 | union { |
40 | struct { |
41 | u64 rip; /* Instruction Pointer */ |
42 | u64 rdp; /* Data Pointer */ |
43 | }; |
44 | struct { |
45 | u32 fip; /* FPU IP Offset */ |
46 | u32 fcs; /* FPU IP Selector */ |
47 | u32 foo; /* FPU Operand Offset */ |
48 | u32 fos; /* FPU Operand Selector */ |
49 | }; |
50 | }; |
51 | u32 mxcsr; /* MXCSR Register State */ |
52 | u32 mxcsr_mask; /* MXCSR Mask */ |
53 | |
54 | /* 8*16 bytes for each FP-reg = 128 bytes: */ |
55 | u32 st_space[32]; |
56 | |
57 | /* 16*16 bytes for each XMM-reg = 256 bytes: */ |
58 | u32 xmm_space[64]; |
59 | |
60 | u32 padding[12]; |
61 | |
62 | union { |
63 | u32 padding1[12]; |
64 | u32 sw_reserved[12]; |
65 | }; |
66 | |
67 | } __attribute__((aligned(16))); |
68 | |
69 | /* Default value for fxregs_state.mxcsr: */ |
70 | #define MXCSR_DEFAULT 0x1f80 |
71 | |
72 | /* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ |
73 | #define MXCSR_AND_FLAGS_SIZE sizeof(u64) |
74 | |
75 | /* |
76 | * Software based FPU emulation state. This is arbitrary really, |
77 | * it matches the x87 format to make it easier to understand: |
78 | */ |
79 | struct swregs_state { |
80 | u32 cwd; |
81 | u32 swd; |
82 | u32 twd; |
83 | u32 fip; |
84 | u32 fcs; |
85 | u32 foo; |
86 | u32 fos; |
87 | /* 8*10 bytes for each FP-reg = 80 bytes: */ |
88 | u32 st_space[20]; |
89 | u8 ftop; |
90 | u8 changed; |
91 | u8 lookahead; |
92 | u8 no_update; |
93 | u8 rm; |
94 | u8 alimit; |
95 | struct math_emu_info *info; |
96 | u32 entry_eip; |
97 | }; |
98 | |
99 | /* |
100 | * List of XSAVE features Linux knows about: |
101 | */ |
102 | enum xfeature { |
103 | XFEATURE_FP, |
104 | XFEATURE_SSE, |
105 | /* |
106 | * Values above here are "legacy states". |
107 | * Those below are "extended states". |
108 | */ |
109 | XFEATURE_YMM, |
110 | XFEATURE_BNDREGS, |
111 | XFEATURE_BNDCSR, |
112 | XFEATURE_OPMASK, |
113 | XFEATURE_ZMM_Hi256, |
114 | XFEATURE_Hi16_ZMM, |
115 | XFEATURE_PT_UNIMPLEMENTED_SO_FAR, |
116 | XFEATURE_PKRU, |
117 | |
118 | XFEATURE_MAX, |
119 | }; |
120 | |
121 | #define XFEATURE_MASK_FP (1 << XFEATURE_FP) |
122 | #define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) |
123 | #define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) |
124 | #define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) |
125 | #define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) |
126 | #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) |
127 | #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) |
128 | #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) |
129 | #define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) |
130 | #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) |
131 | |
132 | #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) |
133 | #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ |
134 | | XFEATURE_MASK_ZMM_Hi256 \ |
135 | | XFEATURE_MASK_Hi16_ZMM) |
136 | |
137 | #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM |
138 | |
139 | struct reg_128_bit { |
140 | u8 regbytes[128/8]; |
141 | }; |
142 | struct reg_256_bit { |
143 | u8 regbytes[256/8]; |
144 | }; |
145 | struct reg_512_bit { |
146 | u8 regbytes[512/8]; |
147 | }; |
148 | |
149 | /* |
150 | * State component 2: |
151 | * |
152 | * There are 16x 256-bit AVX registers named YMM0-YMM15. |
153 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) |
154 | * and are stored in 'struct fxregs_state::xmm_space[]' in the |
155 | * "legacy" area. |
156 | * |
157 | * The high 128 bits are stored here. |
158 | */ |
159 | struct ymmh_struct { |
160 | struct reg_128_bit hi_ymm[16]; |
161 | } __packed; |
162 | |
163 | /* Intel MPX support: */ |
164 | |
165 | struct mpx_bndreg { |
166 | u64 lower_bound; |
167 | u64 upper_bound; |
168 | } __packed; |
169 | /* |
170 | * State component 3 is used for the 4 128-bit bounds registers |
171 | */ |
172 | struct mpx_bndreg_state { |
173 | struct mpx_bndreg bndreg[4]; |
174 | } __packed; |
175 | |
176 | /* |
177 | * State component 4 is used for the 64-bit user-mode MPX |
178 | * configuration register BNDCFGU and the 64-bit MPX status |
179 | * register BNDSTATUS. We call the pair "BNDCSR". |
180 | */ |
181 | struct mpx_bndcsr { |
182 | u64 bndcfgu; |
183 | u64 bndstatus; |
184 | } __packed; |
185 | |
186 | /* |
187 | * The BNDCSR state is padded out to be 64-bytes in size. |
188 | */ |
189 | struct mpx_bndcsr_state { |
190 | union { |
191 | struct mpx_bndcsr bndcsr; |
192 | u8 pad_to_64_bytes[64]; |
193 | }; |
194 | } __packed; |
195 | |
196 | /* AVX-512 Components: */ |
197 | |
198 | /* |
199 | * State component 5 is used for the 8 64-bit opmask registers |
200 | * k0-k7 (opmask state). |
201 | */ |
202 | struct avx_512_opmask_state { |
203 | u64 opmask_reg[8]; |
204 | } __packed; |
205 | |
206 | /* |
207 | * State component 6 is used for the upper 256 bits of the |
208 | * registers ZMM0-ZMM15. These 16 256-bit values are denoted |
209 | * ZMM0_H-ZMM15_H (ZMM_Hi256 state). |
210 | */ |
211 | struct avx_512_zmm_uppers_state { |
212 | struct reg_256_bit zmm_upper[16]; |
213 | } __packed; |
214 | |
215 | /* |
216 | * State component 7 is used for the 16 512-bit registers |
217 | * ZMM16-ZMM31 (Hi16_ZMM state). |
218 | */ |
219 | struct avx_512_hi16_state { |
220 | struct reg_512_bit hi16_zmm[16]; |
221 | } __packed; |
222 | |
223 | /* |
224 | * State component 9: 32-bit PKRU register. The state is |
225 | * 8 bytes long but only 4 bytes is used currently. |
226 | */ |
227 | struct pkru_state { |
228 | u32 pkru; |
229 | u32 pad; |
230 | } __packed; |
231 | |
232 | struct { |
233 | u64 ; |
234 | u64 ; |
235 | u64 [6]; |
236 | } __attribute__((packed)); |
237 | |
238 | /* |
239 | * xstate_header.xcomp_bv[63] indicates that the extended_state_area |
240 | * is in compacted format. |
241 | */ |
242 | #define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) |
243 | |
244 | /* |
245 | * This is our most modern FPU state format, as saved by the XSAVE |
246 | * and restored by the XRSTOR instructions. |
247 | * |
248 | * It consists of a legacy fxregs portion, an xstate header and |
249 | * subsequent areas as defined by the xstate header. Not all CPUs |
250 | * support all the extensions, so the size of the extended area |
251 | * can vary quite a bit between CPUs. |
252 | */ |
253 | struct xregs_state { |
254 | struct fxregs_state i387; |
255 | struct xstate_header ; |
256 | u8 extended_state_area[0]; |
257 | } __attribute__ ((packed, aligned (64))); |
258 | |
259 | /* |
260 | * This is a union of all the possible FPU state formats |
261 | * put together, so that we can pick the right one runtime. |
262 | * |
263 | * The size of the structure is determined by the largest |
264 | * member - which is the xsave area. The padding is there |
265 | * to ensure that statically-allocated task_structs (just |
266 | * the init_task today) have enough space. |
267 | */ |
268 | union fpregs_state { |
269 | struct fregs_state fsave; |
270 | struct fxregs_state fxsave; |
271 | struct swregs_state soft; |
272 | struct xregs_state xsave; |
273 | u8 __padding[PAGE_SIZE]; |
274 | }; |
275 | |
276 | /* |
277 | * Highest level per task FPU state data structure that |
278 | * contains the FPU register state plus various FPU |
279 | * state fields: |
280 | */ |
281 | struct fpu { |
282 | /* |
283 | * @last_cpu: |
284 | * |
285 | * Records the last CPU on which this context was loaded into |
286 | * FPU registers. (In the lazy-restore case we might be |
287 | * able to reuse FPU registers across multiple context switches |
288 | * this way, if no intermediate task used the FPU.) |
289 | * |
290 | * A value of -1 is used to indicate that the FPU state in context |
291 | * memory is newer than the FPU state in registers, and that the |
292 | * FPU state should be reloaded next time the task is run. |
293 | */ |
294 | unsigned int last_cpu; |
295 | |
296 | /* |
297 | * @initialized: |
298 | * |
299 | * This flag indicates whether this context is initialized: if the task |
300 | * is not running then we can restore from this context, if the task |
301 | * is running then we should save into this context. |
302 | */ |
303 | unsigned char initialized; |
304 | |
305 | /* |
306 | * @avx512_timestamp: |
307 | * |
308 | * Records the timestamp of AVX512 use during last context switch. |
309 | */ |
310 | unsigned long avx512_timestamp; |
311 | |
312 | /* |
313 | * @state: |
314 | * |
315 | * In-memory copy of all FPU registers that we save/restore |
316 | * over context switches. If the task is using the FPU then |
317 | * the registers in the FPU are more recent than this state |
318 | * copy. If the task context-switches away then they get |
319 | * saved here and represent the FPU state. |
320 | */ |
321 | union fpregs_state state; |
322 | /* |
323 | * WARNING: 'state' is dynamically-sized. Do not put |
324 | * anything after it here. |
325 | */ |
326 | }; |
327 | |
328 | #endif /* _ASM_X86_FPU_H */ |
329 | |