1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40
41#define QT_NO_CAST_FROM_ASCII
42
43#include "qmimemagicrule_p.h"
44
45#include "qmimetypeparser_p.h"
46#include <QtCore/QList>
47#include <QtCore/QDebug>
48#include <qendian.h>
49
50QT_BEGIN_NAMESPACE
51
52// in the same order as Type!
53static const char magicRuleTypes_string[] =
54 "invalid\0"
55 "string\0"
56 "host16\0"
57 "host32\0"
58 "big16\0"
59 "big32\0"
60 "little16\0"
61 "little32\0"
62 "byte\0"
63 "\0";
64
65static const int magicRuleTypes_indices[] = {
66 0, 8, 15, 22, 29, 35, 41, 50, 59, 64, 0
67};
68
69QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName)
70{
71 for (int i = String; i <= Byte; ++i) {
72 if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i])
73 return Type(i);
74 }
75 return Invalid;
76}
77
78QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType)
79{
80 return magicRuleTypes_string + magicRuleTypes_indices[theType];
81}
82
83bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const
84{
85 return m_type == other.m_type &&
86 m_value == other.m_value &&
87 m_startPos == other.m_startPos &&
88 m_endPos == other.m_endPos &&
89 m_mask == other.m_mask &&
90 m_pattern == other.m_pattern &&
91 m_number == other.m_number &&
92 m_numberMask == other.m_numberMask &&
93 m_matchFunction == other.m_matchFunction;
94}
95
96// Used by both providers
97bool QMimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength,
98 int valueLength, const char *valueData, const char *mask)
99{
100 // Size of searched data.
101 // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
102 const int dataNeeded = qMin(a: rangeLength + valueLength - 1, b: dataSize - rangeStart);
103
104 if (!mask) {
105 // callgrind says QByteArray::indexOf is much slower, since our strings are typically too
106 // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
107 bool found = false;
108 for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
109 if (i + valueLength > dataSize)
110 break;
111
112 if (memcmp(s1: valueData, s2: dataPtr + i, n: valueLength) == 0) {
113 found = true;
114 break;
115 }
116 }
117 if (!found)
118 return false;
119 } else {
120 bool found = false;
121 const char *readDataBase = dataPtr + rangeStart;
122 // Example (continued from above):
123 // deviceSize is 4, so dataNeeded was max'ed to 4.
124 // maxStartPos = 4 - 3 + 1 = 2, and indeed
125 // we need to check for a match a positions 0 and 1 (ABCx and xABC).
126 const int maxStartPos = dataNeeded - valueLength + 1;
127 for (int i = 0; i < maxStartPos; ++i) {
128 const char *d = readDataBase + i;
129 bool valid = true;
130 for (int idx = 0; idx < valueLength; ++idx) {
131 if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
132 valid = false;
133 break;
134 }
135 }
136 if (valid)
137 found = true;
138 }
139 if (!found)
140 return false;
141 }
142 //qDebug() << "Found" << value << "in" << searchedData;
143 return true;
144}
145
146bool QMimeMagicRule::matchString(const QByteArray &data) const
147{
148 const int rangeLength = m_endPos - m_startPos + 1;
149 return QMimeMagicRule::matchSubstring(dataPtr: data.constData(), dataSize: data.size(), rangeStart: m_startPos, rangeLength, valueLength: m_pattern.size(), valueData: m_pattern.constData(), mask: m_mask.constData());
150}
151
152template <typename T>
153bool QMimeMagicRule::matchNumber(const QByteArray &data) const
154{
155 const T value(m_number);
156 const T mask(m_numberMask);
157
158 //qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T);
159 //qDebug() << "mask" << QString::number(m_numberMask, 16);
160
161 const char *p = data.constData() + m_startPos;
162 const char *e = data.constData() + qMin(a: data.size() - int(sizeof(T)), b: m_endPos);
163 for ( ; p <= e; ++p) {
164 if ((qFromUnaligned<T>(p) & mask) == (value & mask))
165 return true;
166 }
167
168 return false;
169}
170
171static inline QByteArray makePattern(const QByteArray &value)
172{
173 QByteArray pattern(value.size(), Qt::Uninitialized);
174 char *data = pattern.data();
175
176 const char *p = value.constData();
177 const char *e = p + value.size();
178 for ( ; p < e; ++p) {
179 if (*p == '\\' && ++p < e) {
180 if (*p == 'x') { // hex (\\xff)
181 char c = 0;
182 for (int i = 0; i < 2 && p + 1 < e; ++i) {
183 ++p;
184 if (*p >= '0' && *p <= '9')
185 c = (c << 4) + *p - '0';
186 else if (*p >= 'a' && *p <= 'f')
187 c = (c << 4) + *p - 'a' + 10;
188 else if (*p >= 'A' && *p <= 'F')
189 c = (c << 4) + *p - 'A' + 10;
190 else
191 continue;
192 }
193 *data++ = c;
194 } else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377)
195 char c = *p - '0';
196 if (p + 1 < e && p[1] >= '0' && p[1] <= '7') {
197 c = (c << 3) + *(++p) - '0';
198 if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3')
199 c = (c << 3) + *(++p) - '0';
200 }
201 *data++ = c;
202 } else if (*p == 'n') {
203 *data++ = '\n';
204 } else if (*p == 'r') {
205 *data++ = '\r';
206 } else if (*p == 't') {
207 *data++ = '\t';
208 } else { // escaped
209 *data++ = *p;
210 }
211 } else {
212 *data++ = *p;
213 }
214 }
215 pattern.truncate(pos: data - pattern.data());
216
217 return pattern;
218}
219
220// Evaluate a magic match rule like
221// <match value="must be converted with BinHex" type="string" offset="11"/>
222// <match value="0x9501" type="big16" offset="0:64"/>
223
224QMimeMagicRule::QMimeMagicRule(const QString &type,
225 const QByteArray &value,
226 const QString &offsets,
227 const QByteArray &mask,
228 QString *errorString)
229 : m_type(QMimeMagicRule::type(theTypeName: type.toLatin1())),
230 m_value(value),
231 m_mask(mask),
232 m_matchFunction(nullptr)
233{
234 if (Q_UNLIKELY(m_type == Invalid))
235 *errorString = QLatin1String("Type ") + type + QLatin1String(" is not supported");
236
237 // Parse for offset as "1" or "1:10"
238 const int colonIndex = offsets.indexOf(c: QLatin1Char(':'));
239 const QStringRef startPosStr = offsets.midRef(position: 0, n: colonIndex); // \ These decay to returning 'offsets'
240 const QStringRef endPosStr = offsets.midRef(position: colonIndex + 1);// / unchanged when colonIndex == -1
241 if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) ||
242 Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) {
243 m_type = Invalid;
244 return;
245 }
246
247 if (Q_UNLIKELY(m_value.isEmpty())) {
248 m_type = Invalid;
249 if (errorString)
250 *errorString = QStringLiteral("Invalid empty magic rule value");
251 return;
252 }
253
254 if (m_type >= Host16 && m_type <= Byte) {
255 bool ok;
256 m_number = m_value.toUInt(ok: &ok, base: 0); // autodetect base
257 if (Q_UNLIKELY(!ok)) {
258 m_type = Invalid;
259 if (errorString)
260 *errorString = QLatin1String("Invalid magic rule value \"") + QLatin1String(m_value) + QLatin1Char('"');
261 return;
262 }
263 m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(ok: &ok, base: 0) : 0; // autodetect base
264 }
265
266 switch (m_type) {
267 case String:
268 m_pattern = makePattern(value: m_value);
269 m_pattern.squeeze();
270 if (!m_mask.isEmpty()) {
271 if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x"))) {
272 m_type = Invalid;
273 if (errorString)
274 *errorString = QLatin1String("Invalid magic rule mask \"") + QLatin1String(m_mask) + QLatin1Char('"');
275 return;
276 }
277 const QByteArray &tempMask = QByteArray::fromHex(hexEncoded: QByteArray::fromRawData(
278 m_mask.constData() + 2, size: m_mask.size() - 2));
279 if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) {
280 m_type = Invalid;
281 if (errorString)
282 *errorString = QLatin1String("Invalid magic rule mask size \"") + QLatin1String(m_mask) + QLatin1Char('"');
283 return;
284 }
285 m_mask = tempMask;
286 } else {
287 m_mask.fill(c: char(-1), size: m_pattern.size());
288 }
289 m_mask.squeeze();
290 m_matchFunction = &QMimeMagicRule::matchString;
291 break;
292 case Byte:
293 if (m_number <= quint8(-1)) {
294 if (m_numberMask == 0)
295 m_numberMask = quint8(-1);
296 m_matchFunction = &QMimeMagicRule::matchNumber<quint8>;
297 }
298 break;
299 case Big16:
300 case Little16:
301 if (m_number <= quint16(-1)) {
302 m_number = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_number) : qFromBigEndian<quint16>(source: m_number);
303 if (m_numberMask != 0)
304 m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_numberMask) : qFromBigEndian<quint16>(source: m_numberMask);
305 }
306 Q_FALLTHROUGH();
307 case Host16:
308 if (m_number <= quint16(-1)) {
309 if (m_numberMask == 0)
310 m_numberMask = quint16(-1);
311 m_matchFunction = &QMimeMagicRule::matchNumber<quint16>;
312 }
313 break;
314 case Big32:
315 case Little32:
316 m_number = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_number) : qFromBigEndian<quint32>(source: m_number);
317 if (m_numberMask != 0)
318 m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_numberMask) : qFromBigEndian<quint32>(source: m_numberMask);
319 Q_FALLTHROUGH();
320 case Host32:
321 if (m_numberMask == 0)
322 m_numberMask = quint32(-1);
323 m_matchFunction = &QMimeMagicRule::matchNumber<quint32>;
324 break;
325 default:
326 break;
327 }
328}
329
330QByteArray QMimeMagicRule::mask() const
331{
332 QByteArray result = m_mask;
333 if (m_type == String) {
334 // restore '0x'
335 result = "0x" + result.toHex();
336 }
337 return result;
338}
339
340bool QMimeMagicRule::matches(const QByteArray &data) const
341{
342 const bool ok = m_matchFunction && (this->*m_matchFunction)(data);
343 if (!ok)
344 return false;
345
346 // No submatch? Then we are done.
347 if (m_subMatches.isEmpty())
348 return true;
349
350 //qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
351 // Check that one of the submatches matches too
352 for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
353 it != end ; ++it ) {
354 if ((*it).matches(data)) {
355 // One of the hierarchies matched -> mimetype recognized.
356 return true;
357 }
358 }
359 return false;
360
361
362}
363
364QT_END_NAMESPACE
365

source code of qtbase/src/corelib/mimetypes/qmimemagicrule.cpp