1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | |
5 | #define QT_NO_CAST_FROM_ASCII |
6 | |
7 | #include "qmimemagicrule_p.h" |
8 | |
9 | #include "qmimetypeparser_p.h" |
10 | #include <QtCore/QList> |
11 | #include <QtCore/QMap> |
12 | #include <QtCore/QDebug> |
13 | #include <qendian.h> |
14 | |
15 | #include <private/qoffsetstringarray_p.h> |
16 | #include <private/qtools_p.h> |
17 | |
18 | QT_BEGIN_NAMESPACE |
19 | |
20 | using namespace Qt::StringLiterals; |
21 | using namespace QtMiscUtils; |
22 | |
23 | // in the same order as Type! |
24 | static constexpr auto magicRuleTypes = qOffsetStringArray( |
25 | strings: "invalid" , |
26 | strings: "string" , |
27 | strings: "host16" , |
28 | strings: "host32" , |
29 | strings: "big16" , |
30 | strings: "big32" , |
31 | strings: "little16" , |
32 | strings: "little32" , |
33 | strings: "byte" |
34 | ); |
35 | |
36 | QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName) |
37 | { |
38 | for (int i = String; i <= Byte; ++i) { |
39 | if (theTypeName == magicRuleTypes.at(index: i)) |
40 | return Type(i); |
41 | } |
42 | return Invalid; |
43 | } |
44 | |
45 | QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType) |
46 | { |
47 | return magicRuleTypes.at(index: theType); |
48 | } |
49 | |
50 | bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const |
51 | { |
52 | return m_type == other.m_type && |
53 | m_value == other.m_value && |
54 | m_startPos == other.m_startPos && |
55 | m_endPos == other.m_endPos && |
56 | m_mask == other.m_mask && |
57 | m_pattern == other.m_pattern && |
58 | m_number == other.m_number && |
59 | m_numberMask == other.m_numberMask && |
60 | m_matchFunction == other.m_matchFunction; |
61 | } |
62 | |
63 | // Used by both providers |
64 | bool QMimeMagicRule::matchSubstring(const char *dataPtr, qsizetype dataSize, int rangeStart, int rangeLength, |
65 | qsizetype valueLength, const char *valueData, const char *mask) |
66 | { |
67 | // Size of searched data. |
68 | // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match) |
69 | const qsizetype dataNeeded = qMin(a: rangeLength + valueLength - 1, b: dataSize - rangeStart); |
70 | |
71 | if (!mask) { |
72 | // callgrind says QByteArray::indexOf is much slower, since our strings are typically too |
73 | // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average). |
74 | bool found = false; |
75 | for (int i = rangeStart; i < rangeStart + rangeLength; ++i) { |
76 | if (i + valueLength > dataSize) |
77 | break; |
78 | |
79 | if (memcmp(s1: valueData, s2: dataPtr + i, n: valueLength) == 0) { |
80 | found = true; |
81 | break; |
82 | } |
83 | } |
84 | if (!found) |
85 | return false; |
86 | } else { |
87 | bool found = false; |
88 | const char *readDataBase = dataPtr + rangeStart; |
89 | // Example (continued from above): |
90 | // deviceSize is 4, so dataNeeded was max'ed to 4. |
91 | // maxStartPos = 4 - 3 + 1 = 2, and indeed |
92 | // we need to check for a match a positions 0 and 1 (ABCx and xABC). |
93 | const qsizetype maxStartPos = dataNeeded - valueLength + 1; |
94 | for (int i = 0; i < maxStartPos; ++i) { |
95 | const char *d = readDataBase + i; |
96 | bool valid = true; |
97 | for (int idx = 0; idx < valueLength; ++idx) { |
98 | if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) { |
99 | valid = false; |
100 | break; |
101 | } |
102 | } |
103 | if (valid) |
104 | found = true; |
105 | } |
106 | if (!found) |
107 | return false; |
108 | } |
109 | //qDebug() << "Found" << value << "in" << searchedData; |
110 | return true; |
111 | } |
112 | |
113 | bool QMimeMagicRule::matchString(const QByteArray &data) const |
114 | { |
115 | const int rangeLength = m_endPos - m_startPos + 1; |
116 | return QMimeMagicRule::matchSubstring(dataPtr: data.constData(), dataSize: data.size(), rangeStart: m_startPos, rangeLength, valueLength: m_pattern.size(), valueData: m_pattern.constData(), mask: m_mask.constData()); |
117 | } |
118 | |
119 | template <typename T> |
120 | bool QMimeMagicRule::matchNumber(const QByteArray &data) const |
121 | { |
122 | const T value(m_number); |
123 | const T mask(m_numberMask); |
124 | |
125 | //qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T); |
126 | //qDebug() << "mask" << QString::number(m_numberMask, 16); |
127 | |
128 | const char *p = data.constData() + m_startPos; |
129 | const char *e = data.constData() + qMin(a: data.size() - int(sizeof(T)), b: m_endPos); |
130 | for ( ; p <= e; ++p) { |
131 | if ((qFromUnaligned<T>(p) & mask) == (value & mask)) |
132 | return true; |
133 | } |
134 | |
135 | return false; |
136 | } |
137 | |
138 | static inline QByteArray makePattern(const QByteArray &value) |
139 | { |
140 | QByteArray pattern(value.size(), Qt::Uninitialized); |
141 | char *data = pattern.data(); |
142 | |
143 | const char *p = value.constData(); |
144 | const char *e = p + value.size(); |
145 | for ( ; p < e; ++p) { |
146 | if (*p == '\\' && ++p < e) { |
147 | if (*p == 'x') { // hex (\\xff) |
148 | char c = 0; |
149 | for (int i = 0; i < 2 && p + 1 < e; ++i) { |
150 | ++p; |
151 | if (const int h = fromHex(c: *p); h != -1) |
152 | c = (c << 4) + h; |
153 | else |
154 | continue; |
155 | } |
156 | *data++ = c; |
157 | } else if (isOctalDigit(c: *p)) { // oct (\\7, or \\77, or \\377) |
158 | char c = *p - '0'; |
159 | if (p + 1 < e && isOctalDigit(c: p[1])) { |
160 | c = (c << 3) + *(++p) - '0'; |
161 | if (p + 1 < e && isOctalDigit(c: p[1]) && p[-1] <= '3') |
162 | c = (c << 3) + *(++p) - '0'; |
163 | } |
164 | *data++ = c; |
165 | } else if (*p == 'n') { |
166 | *data++ = '\n'; |
167 | } else if (*p == 'r') { |
168 | *data++ = '\r'; |
169 | } else if (*p == 't') { |
170 | *data++ = '\t'; |
171 | } else { // escaped |
172 | *data++ = *p; |
173 | } |
174 | } else { |
175 | *data++ = *p; |
176 | } |
177 | } |
178 | pattern.truncate(pos: data - pattern.data()); |
179 | |
180 | return pattern; |
181 | } |
182 | |
183 | // Evaluate a magic match rule like |
184 | // <match value="must be converted with BinHex" type="string" offset="11"/> |
185 | // <match value="0x9501" type="big16" offset="0:64"/> |
186 | |
187 | QMimeMagicRule::QMimeMagicRule(const QString &type, |
188 | const QByteArray &value, |
189 | const QString &offsets, |
190 | const QByteArray &mask, |
191 | QString *errorString) |
192 | : m_type(QMimeMagicRule::type(theTypeName: type.toLatin1())), |
193 | m_value(value), |
194 | m_mask(mask), |
195 | m_matchFunction(nullptr) |
196 | { |
197 | if (Q_UNLIKELY(m_type == Invalid)) { |
198 | if (errorString) |
199 | *errorString = "Type "_L1 + type + " is not supported"_L1 ; |
200 | return; |
201 | } |
202 | |
203 | // Parse for offset as "1" or "1:10" |
204 | const qsizetype colonIndex = offsets.indexOf(c: u':'); |
205 | const QStringView startPosStr = QStringView{offsets}.mid(pos: 0, n: colonIndex); // \ These decay to returning 'offsets' |
206 | const QStringView endPosStr = QStringView{offsets}.mid(pos: colonIndex + 1);// / unchanged when colonIndex == -1 |
207 | if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) || |
208 | Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) { |
209 | m_type = Invalid; |
210 | return; |
211 | } |
212 | |
213 | if (Q_UNLIKELY(m_value.isEmpty())) { |
214 | m_type = Invalid; |
215 | if (errorString) |
216 | *errorString = QStringLiteral("Invalid empty magic rule value" ); |
217 | return; |
218 | } |
219 | |
220 | if (m_type >= Host16 && m_type <= Byte) { |
221 | bool ok; |
222 | m_number = m_value.toUInt(ok: &ok, base: 0); // autodetect base |
223 | if (Q_UNLIKELY(!ok)) { |
224 | m_type = Invalid; |
225 | if (errorString) |
226 | *errorString = "Invalid magic rule value \""_L1 + QLatin1StringView(m_value) + u'"'; |
227 | return; |
228 | } |
229 | m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(ok: &ok, base: 0) : 0; // autodetect base |
230 | } |
231 | |
232 | switch (m_type) { |
233 | case String: |
234 | m_pattern = makePattern(value: m_value); |
235 | m_pattern.squeeze(); |
236 | if (!m_mask.isEmpty()) { |
237 | if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x" ))) { |
238 | m_type = Invalid; |
239 | if (errorString) |
240 | *errorString = "Invalid magic rule mask \""_L1 + QLatin1StringView(m_mask) + u'"'; |
241 | return; |
242 | } |
243 | const QByteArray &tempMask = QByteArray::fromHex(hexEncoded: QByteArray::fromRawData( |
244 | data: m_mask.constData() + 2, size: m_mask.size() - 2)); |
245 | if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) { |
246 | m_type = Invalid; |
247 | if (errorString) |
248 | *errorString = "Invalid magic rule mask size \""_L1 + QLatin1StringView(m_mask) + u'"'; |
249 | return; |
250 | } |
251 | m_mask = tempMask; |
252 | } else { |
253 | m_mask.fill(c: char(-1), size: m_pattern.size()); |
254 | } |
255 | m_mask.squeeze(); |
256 | m_matchFunction = &QMimeMagicRule::matchString; |
257 | break; |
258 | case Byte: |
259 | if (m_number <= quint8(-1)) { |
260 | if (m_numberMask == 0) |
261 | m_numberMask = quint8(-1); |
262 | m_matchFunction = &QMimeMagicRule::matchNumber<quint8>; |
263 | } |
264 | break; |
265 | case Big16: |
266 | case Little16: |
267 | if (m_number <= quint16(-1)) { |
268 | m_number = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_number) : qFromBigEndian<quint16>(source: m_number); |
269 | if (m_numberMask != 0) |
270 | m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(source: m_numberMask) : qFromBigEndian<quint16>(source: m_numberMask); |
271 | } |
272 | Q_FALLTHROUGH(); |
273 | case Host16: |
274 | if (m_number <= quint16(-1)) { |
275 | if (m_numberMask == 0) |
276 | m_numberMask = quint16(-1); |
277 | m_matchFunction = &QMimeMagicRule::matchNumber<quint16>; |
278 | } |
279 | break; |
280 | case Big32: |
281 | case Little32: |
282 | m_number = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_number) : qFromBigEndian<quint32>(source: m_number); |
283 | if (m_numberMask != 0) |
284 | m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(source: m_numberMask) : qFromBigEndian<quint32>(source: m_numberMask); |
285 | Q_FALLTHROUGH(); |
286 | case Host32: |
287 | if (m_numberMask == 0) |
288 | m_numberMask = quint32(-1); |
289 | m_matchFunction = &QMimeMagicRule::matchNumber<quint32>; |
290 | break; |
291 | default: |
292 | break; |
293 | } |
294 | } |
295 | |
296 | QByteArray QMimeMagicRule::mask() const |
297 | { |
298 | QByteArray result = m_mask; |
299 | if (m_type == String) { |
300 | // restore '0x' |
301 | result = "0x" + result.toHex(); |
302 | } |
303 | return result; |
304 | } |
305 | |
306 | bool QMimeMagicRule::matches(const QByteArray &data) const |
307 | { |
308 | const bool ok = m_matchFunction && (this->*m_matchFunction)(data); |
309 | if (!ok) |
310 | return false; |
311 | |
312 | // No submatch? Then we are done. |
313 | if (m_subMatches.isEmpty()) |
314 | return true; |
315 | |
316 | //qDebug() << "Checking" << m_subMatches.count() << "sub-rules"; |
317 | // Check that one of the submatches matches too |
318 | for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ; |
319 | it != end ; ++it ) { |
320 | if ((*it).matches(data)) { |
321 | // One of the hierarchies matched -> mimetype recognized. |
322 | return true; |
323 | } |
324 | } |
325 | return false; |
326 | |
327 | |
328 | } |
329 | |
330 | QT_END_NAMESPACE |
331 | |