1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qmimeglobpattern_p.h" |
5 | |
6 | #if QT_CONFIG(regularexpression) |
7 | #include <QRegularExpression> |
8 | #endif |
9 | #include <QStringList> |
10 | #include <QDebug> |
11 | |
12 | QT_BEGIN_NAMESPACE |
13 | |
14 | using namespace Qt::StringLiterals; |
15 | |
16 | /*! |
17 | \internal |
18 | \class QMimeGlobMatchResult |
19 | \inmodule QtCore |
20 | \brief The QMimeGlobMatchResult class accumulates results from glob matching. |
21 | |
22 | Handles glob weights, and preferring longer matches over shorter matches. |
23 | */ |
24 | |
25 | void QMimeGlobMatchResult::addMatch(const QString &mimeType, int weight, const QString &pattern, |
26 | qsizetype knownSuffixLength) |
27 | { |
28 | if (m_allMatchingMimeTypes.contains(str: mimeType)) |
29 | return; |
30 | // Is this a lower-weight pattern than the last match? Skip this match then. |
31 | if (weight < m_weight) { |
32 | m_allMatchingMimeTypes.append(t: mimeType); |
33 | return; |
34 | } |
35 | bool replace = weight > m_weight; |
36 | if (!replace) { |
37 | // Compare the length of the match |
38 | if (pattern.size() < m_matchingPatternLength) |
39 | return; // too short, ignore |
40 | else if (pattern.size() > m_matchingPatternLength) { |
41 | // longer: clear any previous match (like *.bz2, when pattern is *.tar.bz2) |
42 | replace = true; |
43 | } |
44 | } |
45 | if (replace) { |
46 | m_matchingMimeTypes.clear(); |
47 | // remember the new "longer" length |
48 | m_matchingPatternLength = pattern.size(); |
49 | m_weight = weight; |
50 | } |
51 | if (!m_matchingMimeTypes.contains(str: mimeType)) { |
52 | m_matchingMimeTypes.append(t: mimeType); |
53 | if (replace) |
54 | m_allMatchingMimeTypes.prepend(t: mimeType); // highest-weight first |
55 | else |
56 | m_allMatchingMimeTypes.append(t: mimeType); |
57 | m_knownSuffixLength = knownSuffixLength; |
58 | } |
59 | } |
60 | |
61 | QMimeGlobPattern::PatternType QMimeGlobPattern::detectPatternType(const QString &pattern) const |
62 | { |
63 | const qsizetype patternLength = pattern.size(); |
64 | if (!patternLength) |
65 | return OtherPattern; |
66 | |
67 | const qsizetype starCount = pattern.count(c: u'*'); |
68 | const bool hasSquareBracket = pattern.indexOf(c: u'[') != -1; |
69 | const bool hasQuestionMark = pattern.indexOf(c: u'?') != -1; |
70 | |
71 | if (!hasSquareBracket && !hasQuestionMark) { |
72 | if (starCount == 1) { |
73 | // Patterns like "*~", "*.extension" |
74 | if (pattern.at(i: 0) == u'*') |
75 | return SuffixPattern; |
76 | // Patterns like "README*" (well this is currently the only one like that...) |
77 | if (pattern.at(i: patternLength - 1) == u'*') |
78 | return PrefixPattern; |
79 | } else if (starCount == 0) { |
80 | // Names without any wildcards like "README" |
81 | return LiteralPattern; |
82 | } |
83 | } |
84 | |
85 | if (pattern == "[0-9][0-9][0-9].vdr"_L1 ) |
86 | return VdrPattern; |
87 | |
88 | if (pattern == "*.anim[1-9j]"_L1 ) |
89 | return AnimPattern; |
90 | |
91 | return OtherPattern; |
92 | } |
93 | |
94 | |
95 | /*! |
96 | \internal |
97 | \class QMimeGlobPattern |
98 | \inmodule QtCore |
99 | \brief The QMimeGlobPattern class contains the glob pattern for file names for MIME type matching. |
100 | |
101 | \sa QMimeType, QMimeDatabase, QMimeMagicRuleMatcher, QMimeMagicRule |
102 | */ |
103 | |
104 | bool QMimeGlobPattern::matchFileName(const QString &inputFileName) const |
105 | { |
106 | // "Applications MUST match globs case-insensitively, except when the case-sensitive |
107 | // attribute is set to true." |
108 | // The constructor takes care of putting case-insensitive patterns in lowercase. |
109 | const QString fileName = m_caseSensitivity == Qt::CaseInsensitive |
110 | ? inputFileName.toLower() : inputFileName; |
111 | |
112 | const qsizetype patternLength = m_pattern.size(); |
113 | if (!patternLength) |
114 | return false; |
115 | const qsizetype fileNameLength = fileName.size(); |
116 | |
117 | switch (m_patternType) { |
118 | case SuffixPattern: { |
119 | if (fileNameLength + 1 < patternLength) |
120 | return false; |
121 | |
122 | const QChar *c1 = m_pattern.unicode() + patternLength - 1; |
123 | const QChar *c2 = fileName.unicode() + fileNameLength - 1; |
124 | int cnt = 1; |
125 | while (cnt < patternLength && *c1-- == *c2--) |
126 | ++cnt; |
127 | return cnt == patternLength; |
128 | } |
129 | case PrefixPattern: { |
130 | if (fileNameLength + 1 < patternLength) |
131 | return false; |
132 | |
133 | const QChar *c1 = m_pattern.unicode(); |
134 | const QChar *c2 = fileName.unicode(); |
135 | int cnt = 1; |
136 | while (cnt < patternLength && *c1++ == *c2++) |
137 | ++cnt; |
138 | return cnt == patternLength; |
139 | } |
140 | case LiteralPattern: |
141 | return (m_pattern == fileName); |
142 | case VdrPattern: // "[0-9][0-9][0-9].vdr" case |
143 | return fileNameLength == 7 |
144 | && fileName.at(i: 0).isDigit() && fileName.at(i: 1).isDigit() && fileName.at(i: 2).isDigit() |
145 | && QStringView{fileName}.mid(pos: 3, n: 4) == ".vdr"_L1 ; |
146 | case AnimPattern: { // "*.anim[1-9j]" case |
147 | if (fileNameLength < 6) |
148 | return false; |
149 | const QChar lastChar = fileName.at(i: fileNameLength - 1); |
150 | const bool lastCharOK = (lastChar.isDigit() && lastChar != u'0') |
151 | || lastChar == u'j'; |
152 | return lastCharOK && QStringView{fileName}.mid(pos: fileNameLength - 6, n: 5) == ".anim"_L1 ; |
153 | } |
154 | case OtherPattern: |
155 | // Other fallback patterns: slow but correct method |
156 | #if QT_CONFIG(regularexpression) |
157 | auto rx = QRegularExpression::fromWildcard(pattern: m_pattern); |
158 | return rx.match(subject: fileName).hasMatch(); |
159 | #else |
160 | return false; |
161 | #endif |
162 | } |
163 | return false; |
164 | } |
165 | |
166 | static bool isSimplePattern(const QString &pattern) |
167 | { |
168 | // starts with "*.", has no other '*' |
169 | return pattern.lastIndexOf(c: u'*') == 0 |
170 | && pattern.size() > 1 |
171 | && pattern.at(i: 1) == u'.' // (other dots are OK, like *.tar.bz2) |
172 | // and contains no other special character |
173 | && !pattern.contains(c: u'?') |
174 | && !pattern.contains(c: u'[') |
175 | ; |
176 | } |
177 | |
178 | static bool isFastPattern(const QString &pattern) |
179 | { |
180 | // starts with "*.", has no other '*' and no other '.' |
181 | return pattern.lastIndexOf(c: u'*') == 0 |
182 | && pattern.lastIndexOf(c: u'.') == 1 |
183 | // and contains no other special character |
184 | && !pattern.contains(c: u'?') |
185 | && !pattern.contains(c: u'[') |
186 | ; |
187 | } |
188 | |
189 | void QMimeAllGlobPatterns::addGlob(const QMimeGlobPattern &glob) |
190 | { |
191 | const QString &pattern = glob.pattern(); |
192 | Q_ASSERT(!pattern.isEmpty()); |
193 | |
194 | // Store each patterns into either m_fastPatternDict (*.txt, *.html etc. with default weight 50) |
195 | // or for the rest, like core.*, *.tar.bz2, *~, into highWeightPatternOffset (>50) |
196 | // or lowWeightPatternOffset (<=50) |
197 | |
198 | if (glob.weight() == 50 && isFastPattern(pattern) && !glob.isCaseSensitive()) { |
199 | // The bulk of the patterns is *.foo with weight 50 --> those go into the fast patterns hash. |
200 | const QString extension = pattern.mid(position: 2).toLower(); |
201 | QStringList &patterns = m_fastPatterns[extension]; // find or create |
202 | if (!patterns.contains(str: glob.mimeType())) |
203 | patterns.append(t: glob.mimeType()); |
204 | } else { |
205 | if (glob.weight() > 50) { |
206 | if (!m_highWeightGlobs.hasPattern(mimeType: glob.mimeType(), pattern: glob.pattern())) |
207 | m_highWeightGlobs.append(t: glob); |
208 | } else { |
209 | if (!m_lowWeightGlobs.hasPattern(mimeType: glob.mimeType(), pattern: glob.pattern())) |
210 | m_lowWeightGlobs.append(t: glob); |
211 | } |
212 | } |
213 | } |
214 | |
215 | void QMimeAllGlobPatterns::removeMimeType(const QString &mimeType) |
216 | { |
217 | for (auto &x : m_fastPatterns) |
218 | x.removeAll(t: mimeType); |
219 | m_highWeightGlobs.removeMimeType(mimeType); |
220 | m_lowWeightGlobs.removeMimeType(mimeType); |
221 | } |
222 | |
223 | void QMimeGlobPatternList::match(QMimeGlobMatchResult &result, |
224 | const QString &fileName) const |
225 | { |
226 | for (const QMimeGlobPattern &glob : *this) { |
227 | if (glob.matchFileName(inputFileName: fileName)) { |
228 | const QString pattern = glob.pattern(); |
229 | const qsizetype suffixLen = isSimplePattern(pattern) ? pattern.size() - strlen(s: "*." ) : 0; |
230 | result.addMatch(mimeType: glob.mimeType(), weight: glob.weight(), pattern, knownSuffixLength: suffixLen); |
231 | } |
232 | } |
233 | } |
234 | |
235 | void QMimeAllGlobPatterns::matchingGlobs(const QString &fileName, QMimeGlobMatchResult &result) const |
236 | { |
237 | // First try the high weight matches (>50), if any. |
238 | m_highWeightGlobs.match(result, fileName); |
239 | |
240 | // Now use the "fast patterns" dict, for simple *.foo patterns with weight 50 |
241 | // (which is most of them, so this optimization is definitely worth it) |
242 | const qsizetype lastDot = fileName.lastIndexOf(c: u'.'); |
243 | if (lastDot != -1) { // if no '.', skip the extension lookup |
244 | const qsizetype ext_len = fileName.size() - lastDot - 1; |
245 | const QString simpleExtension = fileName.right(n: ext_len).toLower(); |
246 | // (toLower because fast patterns are always case-insensitive and saved as lowercase) |
247 | |
248 | const QStringList matchingMimeTypes = m_fastPatterns.value(key: simpleExtension); |
249 | const QString simplePattern = "*."_L1 + simpleExtension; |
250 | for (const QString &mime : matchingMimeTypes) |
251 | result.addMatch(mimeType: mime, weight: 50, pattern: simplePattern, knownSuffixLength: simpleExtension.size()); |
252 | // Can't return yet; *.tar.bz2 has to win over *.bz2, so we need the low-weight mimetypes anyway, |
253 | // at least those with weight 50. |
254 | } |
255 | |
256 | // Finally, try the low weight matches (<=50) |
257 | m_lowWeightGlobs.match(result, fileName); |
258 | } |
259 | |
260 | void QMimeAllGlobPatterns::clear() |
261 | { |
262 | m_fastPatterns.clear(); |
263 | m_highWeightGlobs.clear(); |
264 | m_lowWeightGlobs.clear(); |
265 | } |
266 | |
267 | QT_END_NAMESPACE |
268 | |