1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Generate opcode table initializers for the in-kernel disassembler. |
4 | * |
5 | * Copyright IBM Corp. 2017 |
6 | * |
7 | */ |
8 | |
9 | #include <stdlib.h> |
10 | #include <string.h> |
11 | #include <ctype.h> |
12 | #include <stdio.h> |
13 | |
14 | #define STRING_SIZE_MAX 20 |
15 | |
16 | struct insn_type { |
17 | unsigned char byte; |
18 | unsigned char mask; |
19 | char **format; |
20 | }; |
21 | |
22 | struct insn { |
23 | struct insn_type *type; |
24 | char opcode[STRING_SIZE_MAX]; |
25 | char name[STRING_SIZE_MAX]; |
26 | char upper[STRING_SIZE_MAX]; |
27 | char format[STRING_SIZE_MAX]; |
28 | unsigned int name_len; |
29 | }; |
30 | |
31 | struct insn_group { |
32 | struct insn_type *type; |
33 | int offset; |
34 | int count; |
35 | char opcode[2]; |
36 | }; |
37 | |
38 | struct insn_format { |
39 | char *format; |
40 | int type; |
41 | }; |
42 | |
43 | struct gen_opcode { |
44 | struct insn *insn; |
45 | int nr; |
46 | struct insn_group *group; |
47 | int nr_groups; |
48 | }; |
49 | |
50 | /* |
51 | * Table of instruction format types. Each opcode is defined with at |
52 | * least one byte (two nibbles), three nibbles, or two bytes (four |
53 | * nibbles). |
54 | * The byte member of each instruction format type entry defines |
55 | * within which byte of an instruction the third (and fourth) nibble |
56 | * of an opcode can be found. The mask member is the and-mask that |
57 | * needs to be applied on this byte in order to get the third (and |
58 | * fourth) nibble of the opcode. |
59 | * The format array defines all instruction formats (as defined in the |
60 | * Principles of Operation) which have the same position of the opcode |
61 | * nibbles. |
62 | * A special case are instruction formats with 1-byte opcodes. In this |
63 | * case the byte member always is zero, so that the mask is applied on |
64 | * the (only) byte that contains the opcode. |
65 | */ |
66 | static struct insn_type insn_type_table[] = { |
67 | { |
68 | .byte = 0, |
69 | .mask = 0xff, |
70 | .format = (char *[]) { |
71 | "MII" , |
72 | "RR" , |
73 | "RS" , |
74 | "RSI" , |
75 | "RX" , |
76 | "SI" , |
77 | "SMI" , |
78 | "SS" , |
79 | NULL, |
80 | }, |
81 | }, |
82 | { |
83 | .byte = 1, |
84 | .mask = 0x0f, |
85 | .format = (char *[]) { |
86 | "RI" , |
87 | "RIL" , |
88 | "SSF" , |
89 | NULL, |
90 | }, |
91 | }, |
92 | { |
93 | .byte = 1, |
94 | .mask = 0xff, |
95 | .format = (char *[]) { |
96 | "E" , |
97 | "IE" , |
98 | "RRE" , |
99 | "RRF" , |
100 | "RRR" , |
101 | "S" , |
102 | "SIL" , |
103 | "SSE" , |
104 | NULL, |
105 | }, |
106 | }, |
107 | { |
108 | .byte = 5, |
109 | .mask = 0xff, |
110 | .format = (char *[]) { |
111 | "RIE" , |
112 | "RIS" , |
113 | "RRS" , |
114 | "RSE" , |
115 | "RSL" , |
116 | "RSY" , |
117 | "RXE" , |
118 | "RXF" , |
119 | "RXY" , |
120 | "SIY" , |
121 | "VRI" , |
122 | "VRR" , |
123 | "VRS" , |
124 | "VRV" , |
125 | "VRX" , |
126 | "VSI" , |
127 | NULL, |
128 | }, |
129 | }, |
130 | }; |
131 | |
132 | static struct insn_type *insn_format_to_type(char *format) |
133 | { |
134 | char tmp[STRING_SIZE_MAX]; |
135 | char *base_format, **ptr; |
136 | int i; |
137 | |
138 | strcpy(dest: tmp, src: format); |
139 | base_format = tmp; |
140 | base_format = strsep(stringp: &base_format, delim: "_" ); |
141 | for (i = 0; i < sizeof(insn_type_table) / sizeof(insn_type_table[0]); i++) { |
142 | ptr = insn_type_table[i].format; |
143 | while (*ptr) { |
144 | if (!strcmp(s1: base_format, s2: *ptr)) |
145 | return &insn_type_table[i]; |
146 | ptr++; |
147 | } |
148 | } |
149 | exit(EXIT_FAILURE); |
150 | } |
151 | |
152 | static void read_instructions(struct gen_opcode *desc) |
153 | { |
154 | struct insn insn; |
155 | int rc, i; |
156 | |
157 | while (1) { |
158 | rc = scanf(format: "%s %s %s" , insn.opcode, insn.name, insn.format); |
159 | if (rc == EOF) |
160 | break; |
161 | if (rc != 3) |
162 | exit(EXIT_FAILURE); |
163 | insn.type = insn_format_to_type(format: insn.format); |
164 | insn.name_len = strlen(s: insn.name); |
165 | for (i = 0; i <= insn.name_len; i++) |
166 | insn.upper[i] = toupper((unsigned char)insn.name[i]); |
167 | desc->nr++; |
168 | desc->insn = realloc(ptr: desc->insn, size: desc->nr * sizeof(*desc->insn)); |
169 | if (!desc->insn) |
170 | exit(EXIT_FAILURE); |
171 | desc->insn[desc->nr - 1] = insn; |
172 | } |
173 | } |
174 | |
175 | static int cmpformat(const void *a, const void *b) |
176 | { |
177 | return strcmp(s1: ((struct insn *)a)->format, s2: ((struct insn *)b)->format); |
178 | } |
179 | |
180 | static void print_formats(struct gen_opcode *desc) |
181 | { |
182 | char *format; |
183 | int i, count; |
184 | |
185 | qsort(base: desc->insn, nmemb: desc->nr, size: sizeof(*desc->insn), compar: cmpformat); |
186 | format = "" ; |
187 | count = 0; |
188 | printf(format: "enum {\n" ); |
189 | for (i = 0; i < desc->nr; i++) { |
190 | if (!strcmp(s1: format, s2: desc->insn[i].format)) |
191 | continue; |
192 | count++; |
193 | format = desc->insn[i].format; |
194 | printf(format: "\tINSTR_%s,\n" , format); |
195 | } |
196 | printf(format: "}; /* %d */\n\n" , count); |
197 | } |
198 | |
199 | static int cmp_long_insn(const void *a, const void *b) |
200 | { |
201 | return strcmp(s1: ((struct insn *)a)->name, s2: ((struct insn *)b)->name); |
202 | } |
203 | |
204 | static void print_long_insn(struct gen_opcode *desc) |
205 | { |
206 | struct insn *insn; |
207 | int i, count; |
208 | |
209 | qsort(base: desc->insn, nmemb: desc->nr, size: sizeof(*desc->insn), compar: cmp_long_insn); |
210 | count = 0; |
211 | printf(format: "enum {\n" ); |
212 | for (i = 0; i < desc->nr; i++) { |
213 | insn = &desc->insn[i]; |
214 | if (insn->name_len < 6) |
215 | continue; |
216 | printf(format: "\tLONG_INSN_%s,\n" , insn->upper); |
217 | count++; |
218 | } |
219 | printf(format: "}; /* %d */\n\n" , count); |
220 | |
221 | printf(format: "#define LONG_INSN_INITIALIZER { \\\n" ); |
222 | for (i = 0; i < desc->nr; i++) { |
223 | insn = &desc->insn[i]; |
224 | if (insn->name_len < 6) |
225 | continue; |
226 | printf(format: "\t[LONG_INSN_%s] = \"%s\", \\\n" , insn->upper, insn->name); |
227 | } |
228 | printf(format: "}\n\n" ); |
229 | } |
230 | |
231 | static void print_opcode(struct insn *insn, int nr) |
232 | { |
233 | char *opcode; |
234 | |
235 | opcode = insn->opcode; |
236 | if (insn->type->byte != 0) |
237 | opcode += 2; |
238 | printf(format: "\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, " , nr, opcode, insn->format); |
239 | if (insn->name_len < 6) |
240 | printf(format: ".name = \"%s\" " , insn->name); |
241 | else |
242 | printf(format: ".offset = LONG_INSN_%s " , insn->upper); |
243 | printf(format: "}, \\\n" ); |
244 | } |
245 | |
246 | static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) |
247 | { |
248 | struct insn_group *group; |
249 | |
250 | group = desc->group ? &desc->group[desc->nr_groups - 1] : NULL; |
251 | if (group && (!strncmp(s1: group->opcode, s2: insn->opcode, n: 2) || group->type->byte == 0)) { |
252 | group->count++; |
253 | return; |
254 | } |
255 | desc->nr_groups++; |
256 | desc->group = realloc(ptr: desc->group, size: desc->nr_groups * sizeof(*desc->group)); |
257 | if (!desc->group) |
258 | exit(EXIT_FAILURE); |
259 | group = &desc->group[desc->nr_groups - 1]; |
260 | memcpy(dest: group->opcode, src: insn->opcode, n: 2); |
261 | group->type = insn->type; |
262 | group->offset = offset; |
263 | group->count = 1; |
264 | } |
265 | |
266 | static int cmpopcode(const void *a, const void *b) |
267 | { |
268 | return strcmp(s1: ((struct insn *)a)->opcode, s2: ((struct insn *)b)->opcode); |
269 | } |
270 | |
271 | static void print_opcode_table(struct gen_opcode *desc) |
272 | { |
273 | char opcode[2] = "" ; |
274 | struct insn *insn; |
275 | int i, offset; |
276 | |
277 | qsort(base: desc->insn, nmemb: desc->nr, size: sizeof(*desc->insn), compar: cmpopcode); |
278 | printf(format: "#define OPCODE_TABLE_INITIALIZER { \\\n" ); |
279 | offset = 0; |
280 | for (i = 0; i < desc->nr; i++) { |
281 | insn = &desc->insn[i]; |
282 | if (insn->type->byte == 0) |
283 | continue; |
284 | add_to_group(desc, insn, offset); |
285 | if (strncmp(s1: opcode, s2: insn->opcode, n: 2)) { |
286 | memcpy(dest: opcode, src: insn->opcode, n: 2); |
287 | printf(format: "\t/* %.2s */ \\\n" , opcode); |
288 | } |
289 | print_opcode(insn, nr: offset); |
290 | offset++; |
291 | } |
292 | printf(format: "\t/* 1-byte opcode instructions */ \\\n" ); |
293 | for (i = 0; i < desc->nr; i++) { |
294 | insn = &desc->insn[i]; |
295 | if (insn->type->byte != 0) |
296 | continue; |
297 | add_to_group(desc, insn, offset); |
298 | print_opcode(insn, nr: offset); |
299 | offset++; |
300 | } |
301 | printf(format: "}\n\n" ); |
302 | } |
303 | |
304 | static void print_opcode_table_offsets(struct gen_opcode *desc) |
305 | { |
306 | struct insn_group *group; |
307 | int i; |
308 | |
309 | printf(format: "#define OPCODE_OFFSET_INITIALIZER { \\\n" ); |
310 | for (i = 0; i < desc->nr_groups; i++) { |
311 | group = &desc->group[i]; |
312 | printf(format: "\t{ .opcode = 0x%.2s, .mask = 0x%02x, .byte = %d, .offset = %d, .count = %d }, \\\n" , |
313 | group->opcode, group->type->mask, group->type->byte, group->offset, group->count); |
314 | } |
315 | printf(format: "}\n\n" ); |
316 | } |
317 | |
318 | int main(int argc, char **argv) |
319 | { |
320 | struct gen_opcode _desc = { 0 }; |
321 | struct gen_opcode *desc = &_desc; |
322 | |
323 | read_instructions(desc); |
324 | printf(format: "#ifndef __S390_GENERATED_DIS_DEFS_H__\n" ); |
325 | printf(format: "#define __S390_GENERATED_DIS_DEFS_H__\n" ); |
326 | printf(format: "/*\n" ); |
327 | printf(format: " * DO NOT MODIFY.\n" ); |
328 | printf(format: " *\n" ); |
329 | printf(format: " * This file was generated by %s\n" , __FILE__); |
330 | printf(format: " */\n\n" ); |
331 | print_formats(desc); |
332 | print_long_insn(desc); |
333 | print_opcode_table(desc); |
334 | print_opcode_table_offsets(desc); |
335 | printf(format: "#endif\n" ); |
336 | exit(EXIT_SUCCESS); |
337 | } |
338 | |