1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qmimeglobpattern_p.h"
5
6#if QT_CONFIG(regularexpression)
7#include <QRegularExpression>
8#endif
9#include <QStringList>
10#include <QDebug>
11
12QT_BEGIN_NAMESPACE
13
14using namespace Qt::StringLiterals;
15
16/*!
17 \internal
18 \class QMimeGlobMatchResult
19 \inmodule QtCore
20 \brief The QMimeGlobMatchResult class accumulates results from glob matching.
21
22 Handles glob weights, and preferring longer matches over shorter matches.
23*/
24
25void QMimeGlobMatchResult::addMatch(const QString &mimeType, int weight, const QString &pattern,
26 qsizetype knownSuffixLength)
27{
28 if (m_allMatchingMimeTypes.contains(str: mimeType))
29 return;
30 // Is this a lower-weight pattern than the last match? Skip this match then.
31 if (weight < m_weight) {
32 m_allMatchingMimeTypes.append(t: mimeType);
33 return;
34 }
35 bool replace = weight > m_weight;
36 if (!replace) {
37 // Compare the length of the match
38 if (pattern.size() < m_matchingPatternLength)
39 return; // too short, ignore
40 else if (pattern.size() > m_matchingPatternLength) {
41 // longer: clear any previous match (like *.bz2, when pattern is *.tar.bz2)
42 replace = true;
43 }
44 }
45 if (replace) {
46 m_matchingMimeTypes.clear();
47 // remember the new "longer" length
48 m_matchingPatternLength = pattern.size();
49 m_weight = weight;
50 }
51 if (!m_matchingMimeTypes.contains(str: mimeType)) {
52 m_matchingMimeTypes.append(t: mimeType);
53 if (replace)
54 m_allMatchingMimeTypes.prepend(t: mimeType); // highest-weight first
55 else
56 m_allMatchingMimeTypes.append(t: mimeType);
57 m_knownSuffixLength = knownSuffixLength;
58 }
59}
60
61QMimeGlobPattern::PatternType QMimeGlobPattern::detectPatternType(const QString &pattern) const
62{
63 const qsizetype patternLength = pattern.size();
64 if (!patternLength)
65 return OtherPattern;
66
67 const qsizetype starCount = pattern.count(c: u'*');
68 const bool hasSquareBracket = pattern.indexOf(c: u'[') != -1;
69 const bool hasQuestionMark = pattern.indexOf(c: u'?') != -1;
70
71 if (!hasSquareBracket && !hasQuestionMark) {
72 if (starCount == 1) {
73 // Patterns like "*~", "*.extension"
74 if (pattern.at(i: 0) == u'*')
75 return SuffixPattern;
76 // Patterns like "README*" (well this is currently the only one like that...)
77 if (pattern.at(i: patternLength - 1) == u'*')
78 return PrefixPattern;
79 } else if (starCount == 0) {
80 // Names without any wildcards like "README"
81 return LiteralPattern;
82 }
83 }
84
85 if (pattern == "[0-9][0-9][0-9].vdr"_L1)
86 return VdrPattern;
87
88 if (pattern == "*.anim[1-9j]"_L1)
89 return AnimPattern;
90
91 return OtherPattern;
92}
93
94
95/*!
96 \internal
97 \class QMimeGlobPattern
98 \inmodule QtCore
99 \brief The QMimeGlobPattern class contains the glob pattern for file names for MIME type matching.
100
101 \sa QMimeType, QMimeDatabase, QMimeMagicRuleMatcher, QMimeMagicRule
102*/
103
104bool QMimeGlobPattern::matchFileName(const QString &inputFileName) const
105{
106 // "Applications MUST match globs case-insensitively, except when the case-sensitive
107 // attribute is set to true."
108 // The constructor takes care of putting case-insensitive patterns in lowercase.
109 const QString fileName = m_caseSensitivity == Qt::CaseInsensitive
110 ? inputFileName.toLower() : inputFileName;
111
112 const qsizetype patternLength = m_pattern.size();
113 if (!patternLength)
114 return false;
115 const qsizetype fileNameLength = fileName.size();
116
117 switch (m_patternType) {
118 case SuffixPattern: {
119 if (fileNameLength + 1 < patternLength)
120 return false;
121
122 const QChar *c1 = m_pattern.unicode() + patternLength - 1;
123 const QChar *c2 = fileName.unicode() + fileNameLength - 1;
124 int cnt = 1;
125 while (cnt < patternLength && *c1-- == *c2--)
126 ++cnt;
127 return cnt == patternLength;
128 }
129 case PrefixPattern: {
130 if (fileNameLength + 1 < patternLength)
131 return false;
132
133 const QChar *c1 = m_pattern.unicode();
134 const QChar *c2 = fileName.unicode();
135 int cnt = 1;
136 while (cnt < patternLength && *c1++ == *c2++)
137 ++cnt;
138 return cnt == patternLength;
139 }
140 case LiteralPattern:
141 return (m_pattern == fileName);
142 case VdrPattern: // "[0-9][0-9][0-9].vdr" case
143 return fileNameLength == 7
144 && fileName.at(i: 0).isDigit() && fileName.at(i: 1).isDigit() && fileName.at(i: 2).isDigit()
145 && QStringView{fileName}.mid(pos: 3, n: 4) == ".vdr"_L1;
146 case AnimPattern: { // "*.anim[1-9j]" case
147 if (fileNameLength < 6)
148 return false;
149 const QChar lastChar = fileName.at(i: fileNameLength - 1);
150 const bool lastCharOK = (lastChar.isDigit() && lastChar != u'0')
151 || lastChar == u'j';
152 return lastCharOK && QStringView{fileName}.mid(pos: fileNameLength - 6, n: 5) == ".anim"_L1;
153 }
154 case OtherPattern:
155 // Other fallback patterns: slow but correct method
156#if QT_CONFIG(regularexpression)
157 auto rx = QRegularExpression::fromWildcard(pattern: m_pattern);
158 return rx.match(subject: fileName).hasMatch();
159#else
160 return false;
161#endif
162 }
163 return false;
164}
165
166static bool isSimplePattern(const QString &pattern)
167{
168 // starts with "*.", has no other '*'
169 return pattern.lastIndexOf(c: u'*') == 0
170 && pattern.size() > 1
171 && pattern.at(i: 1) == u'.' // (other dots are OK, like *.tar.bz2)
172 // and contains no other special character
173 && !pattern.contains(c: u'?')
174 && !pattern.contains(c: u'[')
175 ;
176}
177
178static bool isFastPattern(const QString &pattern)
179{
180 // starts with "*.", has no other '*' and no other '.'
181 return pattern.lastIndexOf(c: u'*') == 0
182 && pattern.lastIndexOf(c: u'.') == 1
183 // and contains no other special character
184 && !pattern.contains(c: u'?')
185 && !pattern.contains(c: u'[')
186 ;
187}
188
189void QMimeAllGlobPatterns::addGlob(const QMimeGlobPattern &glob)
190{
191 const QString &pattern = glob.pattern();
192 Q_ASSERT(!pattern.isEmpty());
193
194 // Store each patterns into either m_fastPatternDict (*.txt, *.html etc. with default weight 50)
195 // or for the rest, like core.*, *.tar.bz2, *~, into highWeightPatternOffset (>50)
196 // or lowWeightPatternOffset (<=50)
197
198 if (glob.weight() == 50 && isFastPattern(pattern) && !glob.isCaseSensitive()) {
199 // The bulk of the patterns is *.foo with weight 50 --> those go into the fast patterns hash.
200 const QString extension = pattern.mid(position: 2).toLower();
201 QStringList &patterns = m_fastPatterns[extension]; // find or create
202 if (!patterns.contains(str: glob.mimeType()))
203 patterns.append(t: glob.mimeType());
204 } else {
205 if (glob.weight() > 50) {
206 if (!m_highWeightGlobs.hasPattern(mimeType: glob.mimeType(), pattern: glob.pattern()))
207 m_highWeightGlobs.append(t: glob);
208 } else {
209 if (!m_lowWeightGlobs.hasPattern(mimeType: glob.mimeType(), pattern: glob.pattern()))
210 m_lowWeightGlobs.append(t: glob);
211 }
212 }
213}
214
215void QMimeAllGlobPatterns::removeMimeType(const QString &mimeType)
216{
217 for (auto &x : m_fastPatterns)
218 x.removeAll(t: mimeType);
219 m_highWeightGlobs.removeMimeType(mimeType);
220 m_lowWeightGlobs.removeMimeType(mimeType);
221}
222
223void QMimeGlobPatternList::match(QMimeGlobMatchResult &result,
224 const QString &fileName) const
225{
226 for (const QMimeGlobPattern &glob : *this) {
227 if (glob.matchFileName(inputFileName: fileName)) {
228 const QString pattern = glob.pattern();
229 const qsizetype suffixLen = isSimplePattern(pattern) ? pattern.size() - strlen(s: "*.") : 0;
230 result.addMatch(mimeType: glob.mimeType(), weight: glob.weight(), pattern, knownSuffixLength: suffixLen);
231 }
232 }
233}
234
235void QMimeAllGlobPatterns::matchingGlobs(const QString &fileName, QMimeGlobMatchResult &result) const
236{
237 // First try the high weight matches (>50), if any.
238 m_highWeightGlobs.match(result, fileName);
239
240 // Now use the "fast patterns" dict, for simple *.foo patterns with weight 50
241 // (which is most of them, so this optimization is definitely worth it)
242 const qsizetype lastDot = fileName.lastIndexOf(c: u'.');
243 if (lastDot != -1) { // if no '.', skip the extension lookup
244 const qsizetype ext_len = fileName.size() - lastDot - 1;
245 const QString simpleExtension = fileName.right(n: ext_len).toLower();
246 // (toLower because fast patterns are always case-insensitive and saved as lowercase)
247
248 const QStringList matchingMimeTypes = m_fastPatterns.value(key: simpleExtension);
249 const QString simplePattern = "*."_L1 + simpleExtension;
250 for (const QString &mime : matchingMimeTypes)
251 result.addMatch(mimeType: mime, weight: 50, pattern: simplePattern, knownSuffixLength: simpleExtension.size());
252 // Can't return yet; *.tar.bz2 has to win over *.bz2, so we need the low-weight mimetypes anyway,
253 // at least those with weight 50.
254 }
255
256 // Finally, try the low weight matches (<=50)
257 m_lowWeightGlobs.match(result, fileName);
258}
259
260void QMimeAllGlobPatterns::clear()
261{
262 m_fastPatterns.clear();
263 m_highWeightGlobs.clear();
264 m_lowWeightGlobs.clear();
265}
266
267QT_END_NAMESPACE
268

source code of qtbase/src/corelib/mimetypes/qmimeglobpattern.cpp