1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) |
2 | /* |
3 | * Routines for dealing with .zip archives. |
4 | * |
5 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
6 | */ |
7 | |
8 | #include <errno.h> |
9 | #include <fcntl.h> |
10 | #include <stdint.h> |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <sys/mman.h> |
14 | #include <unistd.h> |
15 | |
16 | #include "libbpf_internal.h" |
17 | #include "zip.h" |
18 | |
19 | #pragma GCC diagnostic push |
20 | #pragma GCC diagnostic ignored "-Wpacked" |
21 | #pragma GCC diagnostic ignored "-Wattributes" |
22 | |
23 | /* Specification of ZIP file format can be found here: |
24 | * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT |
25 | * For a high level overview of the structure of a ZIP file see |
26 | * sections 4.3.1 - 4.3.6. |
27 | * |
28 | * Data structures appearing in ZIP files do not contain any |
29 | * padding and they might be misaligned. To allow us to safely |
30 | * operate on pointers to such structures and their members, we |
31 | * declare the types as packed. |
32 | */ |
33 | |
34 | #define END_OF_CD_RECORD_MAGIC 0x06054b50 |
35 | |
36 | /* See section 4.3.16 of the spec. */ |
37 | struct end_of_cd_record { |
38 | /* Magic value equal to END_OF_CD_RECORD_MAGIC */ |
39 | __u32 magic; |
40 | |
41 | /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. |
42 | * Zip archive might span multiple files (disks). |
43 | */ |
44 | __u16 this_disk; |
45 | |
46 | /* Number of the file containing the beginning of the central directory or |
47 | * 0xFFFF if ZIP64 archive. |
48 | */ |
49 | __u16 cd_disk; |
50 | |
51 | /* Number of central directory records on this disk or 0xFFFF if ZIP64 |
52 | * archive. |
53 | */ |
54 | __u16 cd_records; |
55 | |
56 | /* Number of central directory records on all disks or 0xFFFF if ZIP64 |
57 | * archive. |
58 | */ |
59 | __u16 cd_records_total; |
60 | |
61 | /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ |
62 | __u32 cd_size; |
63 | |
64 | /* Offset of the central directory from the beginning of the archive or |
65 | * 0xFFFFFFFF if ZIP64 archive. |
66 | */ |
67 | __u32 cd_offset; |
68 | |
69 | /* Length of comment data following end of central directory record. */ |
70 | __u16 ; |
71 | |
72 | /* Up to 64k of arbitrary bytes. */ |
73 | /* uint8_t comment[comment_length] */ |
74 | } __attribute__((packed)); |
75 | |
76 | #define 0x02014b50 |
77 | #define FLAG_ENCRYPTED (1 << 0) |
78 | #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) |
79 | |
80 | /* See section 4.3.12 of the spec. */ |
81 | struct { |
82 | /* Magic value equal to CD_FILE_HEADER_MAGIC. */ |
83 | __u32 ; |
84 | __u16 ; |
85 | /* Minimum zip version needed to extract the file. */ |
86 | __u16 ; |
87 | __u16 ; |
88 | __u16 ; |
89 | __u16 ; |
90 | __u16 ; |
91 | __u32 ; |
92 | __u32 ; |
93 | __u32 ; |
94 | __u16 ; |
95 | __u16 ; |
96 | __u16 ; |
97 | /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ |
98 | __u16 ; |
99 | __u16 ; |
100 | __u32 ; |
101 | /* Offset from the start of the disk containing the local file header to the |
102 | * start of the local file header. |
103 | */ |
104 | __u32 ; |
105 | } __attribute__((packed)); |
106 | |
107 | #define 0x04034b50 |
108 | |
109 | /* See section 4.3.7 of the spec. */ |
110 | struct { |
111 | /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ |
112 | __u32 ; |
113 | /* Minimum zip version needed to extract the file. */ |
114 | __u16 ; |
115 | __u16 ; |
116 | __u16 ; |
117 | __u16 ; |
118 | __u16 ; |
119 | __u32 ; |
120 | __u32 ; |
121 | __u32 ; |
122 | __u16 ; |
123 | __u16 ; |
124 | } __attribute__((packed)); |
125 | |
126 | #pragma GCC diagnostic pop |
127 | |
128 | struct zip_archive { |
129 | void *data; |
130 | __u32 size; |
131 | __u32 cd_offset; |
132 | __u32 cd_records; |
133 | }; |
134 | |
135 | static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) |
136 | { |
137 | if (offset + size > archive->size || offset > offset + size) |
138 | return NULL; |
139 | |
140 | return archive->data + offset; |
141 | } |
142 | |
143 | /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the |
144 | * archive uses features which are not supported. |
145 | */ |
146 | static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) |
147 | { |
148 | __u16 , cd_records; |
149 | struct end_of_cd_record *eocd; |
150 | __u32 cd_offset, cd_size; |
151 | |
152 | eocd = check_access(archive, offset, size: sizeof(*eocd)); |
153 | if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) |
154 | return -EINVAL; |
155 | |
156 | comment_length = eocd->comment_length; |
157 | if (offset + sizeof(*eocd) + comment_length != archive->size) |
158 | return -EINVAL; |
159 | |
160 | cd_records = eocd->cd_records; |
161 | if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) |
162 | /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ |
163 | return -ENOTSUP; |
164 | |
165 | cd_offset = eocd->cd_offset; |
166 | cd_size = eocd->cd_size; |
167 | if (!check_access(archive, offset: cd_offset, size: cd_size)) |
168 | return -EINVAL; |
169 | |
170 | archive->cd_offset = cd_offset; |
171 | archive->cd_records = cd_records; |
172 | return 0; |
173 | } |
174 | |
175 | static int find_cd(struct zip_archive *archive) |
176 | { |
177 | int64_t limit, offset; |
178 | int rc = -EINVAL; |
179 | |
180 | if (archive->size <= sizeof(struct end_of_cd_record)) |
181 | return -EINVAL; |
182 | |
183 | /* Because the end of central directory ends with a variable length array of |
184 | * up to 0xFFFF bytes we can't know exactly where it starts and need to |
185 | * search for it at the end of the file, scanning the (limit, offset] range. |
186 | */ |
187 | offset = archive->size - sizeof(struct end_of_cd_record); |
188 | limit = (int64_t)offset - (1 << 16); |
189 | |
190 | for (; offset >= 0 && offset > limit && rc != 0; offset--) { |
191 | rc = try_parse_end_of_cd(archive, offset); |
192 | if (rc == -ENOTSUP) |
193 | break; |
194 | } |
195 | return rc; |
196 | } |
197 | |
198 | struct zip_archive *zip_archive_open(const char *path) |
199 | { |
200 | struct zip_archive *archive; |
201 | int err, fd; |
202 | off_t size; |
203 | void *data; |
204 | |
205 | fd = open(path, O_RDONLY | O_CLOEXEC); |
206 | if (fd < 0) |
207 | return ERR_PTR(error: -errno); |
208 | |
209 | size = lseek(fd, 0, SEEK_END); |
210 | if (size == (off_t)-1 || size > UINT32_MAX) { |
211 | close(fd); |
212 | return ERR_PTR(error: -EINVAL); |
213 | } |
214 | |
215 | data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); |
216 | err = -errno; |
217 | close(fd); |
218 | |
219 | if (data == MAP_FAILED) |
220 | return ERR_PTR(error: err); |
221 | |
222 | archive = malloc(sizeof(*archive)); |
223 | if (!archive) { |
224 | munmap(data, size); |
225 | return ERR_PTR(error: -ENOMEM); |
226 | }; |
227 | |
228 | archive->data = data; |
229 | archive->size = size; |
230 | |
231 | err = find_cd(archive); |
232 | if (err) { |
233 | munmap(data, size); |
234 | free(archive); |
235 | return ERR_PTR(error: err); |
236 | } |
237 | |
238 | return archive; |
239 | } |
240 | |
241 | void zip_archive_close(struct zip_archive *archive) |
242 | { |
243 | munmap(archive->data, archive->size); |
244 | free(archive); |
245 | } |
246 | |
247 | static struct local_file_header *(struct zip_archive *archive, |
248 | __u32 offset) |
249 | { |
250 | struct local_file_header *lfh; |
251 | |
252 | lfh = check_access(archive, offset, size: sizeof(*lfh)); |
253 | if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) |
254 | return NULL; |
255 | |
256 | return lfh; |
257 | } |
258 | |
259 | static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) |
260 | { |
261 | struct local_file_header *lfh; |
262 | __u32 compressed_size; |
263 | const char *name; |
264 | void *data; |
265 | |
266 | lfh = local_file_header_at_offset(archive, offset); |
267 | if (!lfh) |
268 | return -EINVAL; |
269 | |
270 | offset += sizeof(*lfh); |
271 | if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) |
272 | return -EINVAL; |
273 | |
274 | name = check_access(archive, offset, size: lfh->file_name_length); |
275 | if (!name) |
276 | return -EINVAL; |
277 | |
278 | offset += lfh->file_name_length; |
279 | if (!check_access(archive, offset, size: lfh->extra_field_length)) |
280 | return -EINVAL; |
281 | |
282 | offset += lfh->extra_field_length; |
283 | compressed_size = lfh->compressed_size; |
284 | data = check_access(archive, offset, size: compressed_size); |
285 | if (!data) |
286 | return -EINVAL; |
287 | |
288 | out->compression = lfh->compression; |
289 | out->name_length = lfh->file_name_length; |
290 | out->name = name; |
291 | out->data = data; |
292 | out->data_length = compressed_size; |
293 | out->data_offset = offset; |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | int zip_archive_find_entry(struct zip_archive *archive, const char *file_name, |
299 | struct zip_entry *out) |
300 | { |
301 | size_t file_name_length = strlen(file_name); |
302 | __u32 i, offset = archive->cd_offset; |
303 | |
304 | for (i = 0; i < archive->cd_records; ++i) { |
305 | __u16 cdfh_name_length, cdfh_flags; |
306 | struct cd_file_header *cdfh; |
307 | const char *cdfh_name; |
308 | |
309 | cdfh = check_access(archive, offset, size: sizeof(*cdfh)); |
310 | if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) |
311 | return -EINVAL; |
312 | |
313 | offset += sizeof(*cdfh); |
314 | cdfh_name_length = cdfh->file_name_length; |
315 | cdfh_name = check_access(archive, offset, size: cdfh_name_length); |
316 | if (!cdfh_name) |
317 | return -EINVAL; |
318 | |
319 | cdfh_flags = cdfh->flags; |
320 | if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && |
321 | (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && |
322 | file_name_length == cdfh_name_length && |
323 | memcmp(p: file_name, q: archive->data + offset, size: file_name_length) == 0) { |
324 | return get_entry_at_offset(archive, offset: cdfh->offset, out); |
325 | } |
326 | |
327 | offset += cdfh_name_length; |
328 | offset += cdfh->extra_field_length; |
329 | offset += cdfh->file_comment_length; |
330 | } |
331 | |
332 | return -ENOENT; |
333 | } |
334 | |