1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | |
41 | #define QT_NO_CAST_FROM_ASCII |
42 | |
43 | #include "qmimemagicrule_p.h" |
44 | |
45 | #include "qmimetypeparser_p.h" |
46 | #include <QtCore/QList> |
47 | #include <QtCore/QDebug> |
48 | #include <qendian.h> |
49 | |
50 | QT_BEGIN_NAMESPACE |
51 | |
52 | // in the same order as Type! |
53 | static const char magicRuleTypes_string[] = |
54 | "invalid\0" |
55 | "string\0" |
56 | "host16\0" |
57 | "host32\0" |
58 | "big16\0" |
59 | "big32\0" |
60 | "little16\0" |
61 | "little32\0" |
62 | "byte\0" |
63 | "\0" ; |
64 | |
65 | static const int magicRuleTypes_indices[] = { |
66 | 0, 8, 15, 22, 29, 35, 41, 50, 59, 64, 0 |
67 | }; |
68 | |
69 | QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName) |
70 | { |
71 | for (int i = String; i <= Byte; ++i) { |
72 | if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i]) |
73 | return Type(i); |
74 | } |
75 | return Invalid; |
76 | } |
77 | |
78 | QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType) |
79 | { |
80 | return magicRuleTypes_string + magicRuleTypes_indices[theType]; |
81 | } |
82 | |
83 | bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const |
84 | { |
85 | return m_type == other.m_type && |
86 | m_value == other.m_value && |
87 | m_startPos == other.m_startPos && |
88 | m_endPos == other.m_endPos && |
89 | m_mask == other.m_mask && |
90 | m_pattern == other.m_pattern && |
91 | m_number == other.m_number && |
92 | m_numberMask == other.m_numberMask && |
93 | m_matchFunction == other.m_matchFunction; |
94 | } |
95 | |
96 | // Used by both providers |
97 | bool QMimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength, |
98 | int valueLength, const char *valueData, const char *mask) |
99 | { |
100 | // Size of searched data. |
101 | // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match) |
102 | const int dataNeeded = qMin(a: rangeLength + valueLength - 1, b: dataSize - rangeStart); |
103 | |
104 | if (!mask) { |
105 | // callgrind says QByteArray::indexOf is much slower, since our strings are typically too |
106 | // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average). |
107 | bool found = false; |
108 | for (int i = rangeStart; i < rangeStart + rangeLength; ++i) { |
109 | if (i + valueLength > dataSize) |
110 | break; |
111 | |
112 | if (memcmp(s1: valueData, s2: dataPtr + i, n: valueLength) == 0) { |
113 | found = true; |
114 | break; |
115 | } |
116 | } |
117 | if (!found) |
118 | return false; |
119 | } else { |
120 | bool found = false; |
121 | const char *readDataBase = dataPtr + rangeStart; |
122 | // Example (continued from above): |
123 | // deviceSize is 4, so dataNeeded was max'ed to 4. |
124 | // maxStartPos = 4 - 3 + 1 = 2, and indeed |
125 | // we need to check for a match a positions 0 and 1 (ABCx and xABC). |
126 | const int maxStartPos = dataNeeded - valueLength + 1; |
127 | for (int i = 0; i < maxStartPos; ++i) { |
128 | const char *d = readDataBase + i; |
129 | bool valid = true; |
130 | for (int idx = 0; idx < valueLength; ++idx) { |
131 | if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) { |
132 | valid = false; |
133 | break; |
134 | } |
135 | } |
136 | if (valid) |
137 | found = true; |
138 | } |
139 | if (!found) |
140 | return false; |
141 | } |
142 | //qDebug() << "Found" << value << "in" << searchedData; |
143 | return true; |
144 | } |
145 | |
146 | bool QMimeMagicRule::matchString(const QByteArray &data) const |
147 | { |
148 | const int rangeLength = m_endPos - m_startPos + 1; |
149 | return QMimeMagicRule::matchSubstring(dataPtr: data.constData(), dataSize: data.size(), rangeStart: m_startPos, rangeLength, valueLength: m_pattern.size(), valueData: m_pattern.constData(), mask: m_mask.constData()); |
150 | } |
151 | |
152 | template <typename T> |
153 | bool QMimeMagicRule::matchNumber(const QByteArray &data) const |
154 | { |
155 | const T value(m_number); |
156 | const T mask(m_numberMask); |
157 | |
158 | //qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T); |
159 | //qDebug() << "mask" << QString::number(m_numberMask, 16); |
160 | |
161 | const char *p = data.constData() + m_startPos; |
162 | const char *e = data.constData() + qMin(a: data.size() - int(sizeof(T)), b: m_endPos); |
163 | for ( ; p <= e; ++p) { |
164 | if ((qFromUnaligned<T>(p) & mask) == (value & mask)) |
165 | return true; |
166 | } |
167 | |
168 | return false; |
169 | } |
170 | |
171 | static inline QByteArray makePattern(const QByteArray &value) |
172 | { |
173 | QByteArray pattern(value.size(), Qt::Uninitialized); |
174 | char *data = pattern.data(); |
175 | |
176 | const char *p = value.constData(); |
177 | const char *e = p + value.size(); |
178 | for ( ; p < e; ++p) { |
179 | if (*p == '\\' && ++p < e) { |
180 | if (*p == 'x') { // hex (\\xff) |
181 | char c = 0; |
182 | for (int i = 0; i < 2 && p + 1 < e; ++i) { |
183 | ++p; |
184 | if (*p >= '0' && *p <= '9') |
185 | c = (c << 4) + *p - '0'; |
186 | else if (*p >= 'a' && *p <= 'f') |
187 | c = (c << 4) + *p - 'a' + 10; |
188 | else if (*p >= 'A' && *p <= 'F') |
189 | c = (c << 4) + *p - 'A' + 10; |
190 | else |
191 | continue; |
192 | } |
193 | *data++ = c; |
194 | } else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377) |
195 | char c = *p - '0'; |
196 | if (p + 1 < e && p[1] >= '0' && p[1] <= '7') { |
197 | c = (c << 3) + *(++p) - '0'; |
198 | if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3') |
199 | c = (c << 3) + *(++p) - '0'; |
200 | } |
201 | *data++ = c; |
202 | } else if (*p == 'n') { |
203 | *data++ = '\n'; |
204 | } else if (*p == 'r') { |
205 | *data++ = '\r'; |
206 | } else if (*p == 't') { |
207 | *data++ = '\t'; |
208 | } else { // escaped |
209 | *data++ = *p; |
210 | } |
211 | } else { |
212 | *data++ = *p; |
213 | } |
214 | } |
215 | pattern.truncate(pos: data - pattern.data()); |
216 | |
217 | return pattern; |
218 | } |
219 | |
220 | // Evaluate a magic match rule like |
221 | // <match value="must be converted with BinHex" type="string" offset="11"/> |
222 | // <match value="0x9501" type="big16" offset="0:64"/> |
223 | |
224 | QMimeMagicRule::QMimeMagicRule(const QString &type, |
225 | const QByteArray &value, |
226 | const QString &offsets, |
227 | const QByteArray &mask, |
228 | QString *errorString) |
229 | : m_type(QMimeMagicRule::type(theTypeName: type.toLatin1())), |
230 | m_value(value), |
231 | m_mask(mask), |
232 | m_matchFunction(nullptr) |
233 | { |
234 | if (Q_UNLIKELY(m_type == Invalid)) |
235 | *errorString = QLatin1String("Type " ) + type + QLatin1String(" is not supported" ); |
236 | |
237 | // Parse for offset as "1" or "1:10" |
238 | const int colonIndex = offsets.indexOf(c: QLatin1Char(':')); |
239 | const QStringRef startPosStr = offsets.midRef(position: 0, n: colonIndex); // \ These decay to returning 'offsets' |
240 | const QStringRef endPosStr = offsets.midRef(position: colonIndex + 1);// / unchanged when colonIndex == -1 |
241 | if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) || |
242 | Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) { |
243 | m_type = Invalid; |
244 | return; |
245 | } |
246 | |
247 | if (Q_UNLIKELY(m_value.isEmpty())) { |
248 | m_type = Invalid; |
249 | if (errorString) |
250 | *errorString = QStringLiteral("Invalid empty magic rule value" ); |
251 | return; |
252 | } |
253 | |
254 | if (m_type >= Host16 && m_type <= Byte) { |
255 | bool ok; |
256 | m_number = m_value.toUInt(ok: &ok, base: 0); // autodetect base |
257 | if (Q_UNLIKELY(!ok)) { |
258 | m_type = Invalid; |
259 | if (errorString) |
260 | *errorString = QLatin1String("Invalid magic rule value \"" ) + QLatin1String(m_value) + QLatin1Char('"'); |
261 | return; |
262 | } |
263 | m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(ok: &ok, base: 0) : 0; // autodetect base |
264 | } |
265 | |
266 | switch (m_type) { |
267 | case String: |
268 | m_pattern = makePattern(value: m_value); |
269 | m_pattern.squeeze(); |
270 | if (!m_mask.isEmpty()) { |
271 | if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x" ))) { |
272 | m_type = Invalid; |
273 | if (errorString) |
274 | *errorString = QLatin1String("Invalid magic rule mask \"" ) + QLatin1String(m_mask) + QLatin1Char('"'); |
275 | return; |
276 | } |
277 | const QByteArray &tempMask = QByteArray::fromHex(hexEncoded: QByteArray::fromRawData( |
278 | m_mask.constData() + 2, size: m_mask.size() - 2)); |
279 | if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) { |
280 | m_type = Invalid; |
281 | if (errorString) |
282 | *errorString = QLatin1String("Invalid magic rule mask size \"" ) + QLatin1String(m_mask) + QLatin1Char('"'); |
283 | return; |
284 | } |
285 | m_mask = tempMask; |
286 | } else { |
287 | m_mask.fill(c: char(-1), size: m_pattern.size()); |
288 | } |
289 | m_mask.squeeze(); |
290 | m_matchFunction = &QMimeMagicRule::matchString; |
291 | break; |
292 | case Byte: |
293 | if (m_number <= quint8(-1)) { |
294 | if (m_numberMask == 0) |
295 | m_numberMask = quint8(-1); |
296 | m_matchFunction = &QMimeMagicRule::matchNumber<quint8>; |
297 | } |
298 | break; |
299 | case Big16: |
300 | case Little16: |
301 | if (m_number <= quint16(-1)) { |
302 | m_number = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_number) : qFromBigEndian<quint16>(source: m_number); |
303 | if (m_numberMask != 0) |
304 | m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_numberMask) : qFromBigEndian<quint16>(source: m_numberMask); |
305 | } |
306 | Q_FALLTHROUGH(); |
307 | case Host16: |
308 | if (m_number <= quint16(-1)) { |
309 | if (m_numberMask == 0) |
310 | m_numberMask = quint16(-1); |
311 | m_matchFunction = &QMimeMagicRule::matchNumber<quint16>; |
312 | } |
313 | break; |
314 | case Big32: |
315 | case Little32: |
316 | m_number = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_number) : qFromBigEndian<quint32>(source: m_number); |
317 | if (m_numberMask != 0) |
318 | m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_numberMask) : qFromBigEndian<quint32>(source: m_numberMask); |
319 | Q_FALLTHROUGH(); |
320 | case Host32: |
321 | if (m_numberMask == 0) |
322 | m_numberMask = quint32(-1); |
323 | m_matchFunction = &QMimeMagicRule::matchNumber<quint32>; |
324 | break; |
325 | default: |
326 | break; |
327 | } |
328 | } |
329 | |
330 | QByteArray QMimeMagicRule::mask() const |
331 | { |
332 | QByteArray result = m_mask; |
333 | if (m_type == String) { |
334 | // restore '0x' |
335 | result = "0x" + result.toHex(); |
336 | } |
337 | return result; |
338 | } |
339 | |
340 | bool QMimeMagicRule::matches(const QByteArray &data) const |
341 | { |
342 | const bool ok = m_matchFunction && (this->*m_matchFunction)(data); |
343 | if (!ok) |
344 | return false; |
345 | |
346 | // No submatch? Then we are done. |
347 | if (m_subMatches.isEmpty()) |
348 | return true; |
349 | |
350 | //qDebug() << "Checking" << m_subMatches.count() << "sub-rules"; |
351 | // Check that one of the submatches matches too |
352 | for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ; |
353 | it != end ; ++it ) { |
354 | if ((*it).matches(data)) { |
355 | // One of the hierarchies matched -> mimetype recognized. |
356 | return true; |
357 | } |
358 | } |
359 | return false; |
360 | |
361 | |
362 | } |
363 | |
364 | QT_END_NAMESPACE |
365 | |