1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4
5#define QT_NO_CAST_FROM_ASCII
6
7#include "qmimemagicrule_p.h"
8
9#include "qmimetypeparser_p.h"
10#include <QtCore/QList>
11#include <QtCore/QMap>
12#include <QtCore/QDebug>
13#include <qendian.h>
14
15#include <private/qoffsetstringarray_p.h>
16#include <private/qtools_p.h>
17
18QT_BEGIN_NAMESPACE
19
20using namespace Qt::StringLiterals;
21using namespace QtMiscUtils;
22
23// in the same order as Type!
24static constexpr auto magicRuleTypes = qOffsetStringArray(
25 strings: "invalid",
26 strings: "string",
27 strings: "host16",
28 strings: "host32",
29 strings: "big16",
30 strings: "big32",
31 strings: "little16",
32 strings: "little32",
33 strings: "byte"
34);
35
36QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName)
37{
38 for (int i = String; i <= Byte; ++i) {
39 if (theTypeName == magicRuleTypes.at(index: i))
40 return Type(i);
41 }
42 return Invalid;
43}
44
45QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType)
46{
47 return magicRuleTypes.at(index: theType);
48}
49
50bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const
51{
52 return m_type == other.m_type &&
53 m_value == other.m_value &&
54 m_startPos == other.m_startPos &&
55 m_endPos == other.m_endPos &&
56 m_mask == other.m_mask &&
57 m_pattern == other.m_pattern &&
58 m_number == other.m_number &&
59 m_numberMask == other.m_numberMask &&
60 m_matchFunction == other.m_matchFunction;
61}
62
63// Used by both providers
64bool QMimeMagicRule::matchSubstring(const char *dataPtr, qsizetype dataSize, int rangeStart, int rangeLength,
65 qsizetype valueLength, const char *valueData, const char *mask)
66{
67 // Size of searched data.
68 // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
69 const qsizetype dataNeeded = qMin(a: rangeLength + valueLength - 1, b: dataSize - rangeStart);
70
71 if (!mask) {
72 // callgrind says QByteArray::indexOf is much slower, since our strings are typically too
73 // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
74 bool found = false;
75 for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
76 if (i + valueLength > dataSize)
77 break;
78
79 if (memcmp(s1: valueData, s2: dataPtr + i, n: valueLength) == 0) {
80 found = true;
81 break;
82 }
83 }
84 if (!found)
85 return false;
86 } else {
87 bool found = false;
88 const char *readDataBase = dataPtr + rangeStart;
89 // Example (continued from above):
90 // deviceSize is 4, so dataNeeded was max'ed to 4.
91 // maxStartPos = 4 - 3 + 1 = 2, and indeed
92 // we need to check for a match a positions 0 and 1 (ABCx and xABC).
93 const qsizetype maxStartPos = dataNeeded - valueLength + 1;
94 for (int i = 0; i < maxStartPos; ++i) {
95 const char *d = readDataBase + i;
96 bool valid = true;
97 for (int idx = 0; idx < valueLength; ++idx) {
98 if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
99 valid = false;
100 break;
101 }
102 }
103 if (valid)
104 found = true;
105 }
106 if (!found)
107 return false;
108 }
109 //qDebug() << "Found" << value << "in" << searchedData;
110 return true;
111}
112
113bool QMimeMagicRule::matchString(const QByteArray &data) const
114{
115 const int rangeLength = m_endPos - m_startPos + 1;
116 return QMimeMagicRule::matchSubstring(dataPtr: data.constData(), dataSize: data.size(), rangeStart: m_startPos, rangeLength, valueLength: m_pattern.size(), valueData: m_pattern.constData(), mask: m_mask.constData());
117}
118
119template <typename T>
120bool QMimeMagicRule::matchNumber(const QByteArray &data) const
121{
122 const T value(m_number);
123 const T mask(m_numberMask);
124
125 //qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T);
126 //qDebug() << "mask" << QString::number(m_numberMask, 16);
127
128 const char *p = data.constData() + m_startPos;
129 const char *e = data.constData() + qMin(a: data.size() - int(sizeof(T)), b: m_endPos);
130 for ( ; p <= e; ++p) {
131 if ((qFromUnaligned<T>(p) & mask) == (value & mask))
132 return true;
133 }
134
135 return false;
136}
137
138static inline QByteArray makePattern(const QByteArray &value)
139{
140 QByteArray pattern(value.size(), Qt::Uninitialized);
141 char *data = pattern.data();
142
143 const char *p = value.constData();
144 const char *e = p + value.size();
145 for ( ; p < e; ++p) {
146 if (*p == '\\' && ++p < e) {
147 if (*p == 'x') { // hex (\\xff)
148 char c = 0;
149 for (int i = 0; i < 2 && p + 1 < e; ++i) {
150 ++p;
151 if (const int h = fromHex(c: *p); h != -1)
152 c = (c << 4) + h;
153 else
154 continue;
155 }
156 *data++ = c;
157 } else if (isOctalDigit(c: *p)) { // oct (\\7, or \\77, or \\377)
158 char c = *p - '0';
159 if (p + 1 < e && isOctalDigit(c: p[1])) {
160 c = (c << 3) + *(++p) - '0';
161 if (p + 1 < e && isOctalDigit(c: p[1]) && p[-1] <= '3')
162 c = (c << 3) + *(++p) - '0';
163 }
164 *data++ = c;
165 } else if (*p == 'n') {
166 *data++ = '\n';
167 } else if (*p == 'r') {
168 *data++ = '\r';
169 } else if (*p == 't') {
170 *data++ = '\t';
171 } else { // escaped
172 *data++ = *p;
173 }
174 } else {
175 *data++ = *p;
176 }
177 }
178 pattern.truncate(pos: data - pattern.data());
179
180 return pattern;
181}
182
183// Evaluate a magic match rule like
184// <match value="must be converted with BinHex" type="string" offset="11"/>
185// <match value="0x9501" type="big16" offset="0:64"/>
186
187QMimeMagicRule::QMimeMagicRule(const QString &type,
188 const QByteArray &value,
189 const QString &offsets,
190 const QByteArray &mask,
191 QString *errorString)
192 : m_type(QMimeMagicRule::type(theTypeName: type.toLatin1())),
193 m_value(value),
194 m_mask(mask),
195 m_matchFunction(nullptr)
196{
197 if (Q_UNLIKELY(m_type == Invalid)) {
198 if (errorString)
199 *errorString = "Type "_L1 + type + " is not supported"_L1;
200 return;
201 }
202
203 // Parse for offset as "1" or "1:10"
204 const qsizetype colonIndex = offsets.indexOf(c: u':');
205 const QStringView startPosStr = QStringView{offsets}.mid(pos: 0, n: colonIndex); // \ These decay to returning 'offsets'
206 const QStringView endPosStr = QStringView{offsets}.mid(pos: colonIndex + 1);// / unchanged when colonIndex == -1
207 if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) ||
208 Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) {
209 m_type = Invalid;
210 return;
211 }
212
213 if (Q_UNLIKELY(m_value.isEmpty())) {
214 m_type = Invalid;
215 if (errorString)
216 *errorString = QStringLiteral("Invalid empty magic rule value");
217 return;
218 }
219
220 if (m_type >= Host16 && m_type <= Byte) {
221 bool ok;
222 m_number = m_value.toUInt(ok: &ok, base: 0); // autodetect base
223 if (Q_UNLIKELY(!ok)) {
224 m_type = Invalid;
225 if (errorString)
226 *errorString = "Invalid magic rule value \""_L1 + QLatin1StringView(m_value) + u'"';
227 return;
228 }
229 m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(ok: &ok, base: 0) : 0; // autodetect base
230 }
231
232 switch (m_type) {
233 case String:
234 m_pattern = makePattern(value: m_value);
235 m_pattern.squeeze();
236 if (!m_mask.isEmpty()) {
237 if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x"))) {
238 m_type = Invalid;
239 if (errorString)
240 *errorString = "Invalid magic rule mask \""_L1 + QLatin1StringView(m_mask) + u'"';
241 return;
242 }
243 const QByteArray &tempMask = QByteArray::fromHex(hexEncoded: QByteArray::fromRawData(
244 data: m_mask.constData() + 2, size: m_mask.size() - 2));
245 if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) {
246 m_type = Invalid;
247 if (errorString)
248 *errorString = "Invalid magic rule mask size \""_L1 + QLatin1StringView(m_mask) + u'"';
249 return;
250 }
251 m_mask = tempMask;
252 } else {
253 m_mask.fill(c: char(-1), size: m_pattern.size());
254 }
255 m_mask.squeeze();
256 m_matchFunction = &QMimeMagicRule::matchString;
257 break;
258 case Byte:
259 if (m_number <= quint8(-1)) {
260 if (m_numberMask == 0)
261 m_numberMask = quint8(-1);
262 m_matchFunction = &QMimeMagicRule::matchNumber<quint8>;
263 }
264 break;
265 case Big16:
266 case Little16:
267 if (m_number <= quint16(-1)) {
268 m_number = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_number) : qFromBigEndian<quint16>(source: m_number);
269 if (m_numberMask != 0)
270 m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_numberMask) : qFromBigEndian<quint16>(source: m_numberMask);
271 }
272 Q_FALLTHROUGH();
273 case Host16:
274 if (m_number <= quint16(-1)) {
275 if (m_numberMask == 0)
276 m_numberMask = quint16(-1);
277 m_matchFunction = &QMimeMagicRule::matchNumber<quint16>;
278 }
279 break;
280 case Big32:
281 case Little32:
282 m_number = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_number) : qFromBigEndian<quint32>(source: m_number);
283 if (m_numberMask != 0)
284 m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_numberMask) : qFromBigEndian<quint32>(source: m_numberMask);
285 Q_FALLTHROUGH();
286 case Host32:
287 if (m_numberMask == 0)
288 m_numberMask = quint32(-1);
289 m_matchFunction = &QMimeMagicRule::matchNumber<quint32>;
290 break;
291 default:
292 break;
293 }
294}
295
296QByteArray QMimeMagicRule::mask() const
297{
298 QByteArray result = m_mask;
299 if (m_type == String) {
300 // restore '0x'
301 result = "0x" + result.toHex();
302 }
303 return result;
304}
305
306bool QMimeMagicRule::matches(const QByteArray &data) const
307{
308 const bool ok = m_matchFunction && (this->*m_matchFunction)(data);
309 if (!ok)
310 return false;
311
312 // No submatch? Then we are done.
313 if (m_subMatches.isEmpty())
314 return true;
315
316 //qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
317 // Check that one of the submatches matches too
318 for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
319 it != end ; ++it ) {
320 if ((*it).matches(data)) {
321 // One of the hierarchies matched -> mimetype recognized.
322 return true;
323 }
324 }
325 return false;
326
327
328}
329
330QT_END_NAMESPACE
331

source code of qtbase/src/corelib/mimetypes/qmimemagicrule.cpp