1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2019 Mail.ru Group.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qstringmatcher.h"
6
7QT_BEGIN_NAMESPACE
8
9static constexpr qsizetype FoldBufferCapacity = 256;
10
11static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs)
12{
13 const char16_t *uc = needle.utf16();
14 const qsizetype len =
15 cs == Qt::CaseSensitive ? needle.size() : qMin(a: needle.size(), b: FoldBufferCapacity);
16 int l = qMin(a: int(len), b: 255);
17 memset(s: skiptable, c: l, n: 256 * sizeof(uchar));
18 uc += len - l;
19 if (cs == Qt::CaseSensitive) {
20 while (l--) {
21 skiptable[*uc & 0xff] = l;
22 ++uc;
23 }
24 } else {
25 const char16_t *start = uc;
26 while (l--) {
27 skiptable[foldCase(ch: uc, start) & 0xff] = l;
28 ++uc;
29 }
30 }
31}
32
33static inline qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle,
34 const uchar *skiptable, Qt::CaseSensitivity cs)
35{
36 const char16_t *uc = haystack.utf16();
37 const qsizetype l = haystack.size();
38 const char16_t *puc = needle.utf16();
39 const qsizetype pl = needle.size();
40
41 if (pl == 0)
42 return index > l ? -1 : index;
43
44 if (cs == Qt::CaseSensitive) {
45 const qsizetype pl_minus_one = pl - 1;
46 const char16_t *current = uc + index + pl_minus_one;
47 const char16_t *end = uc + l;
48
49 while (current < end) {
50 qsizetype skip = skiptable[*current & 0xff];
51 if (!skip) {
52 // possible match
53 while (skip < pl) {
54 if (*(current - skip) != puc[pl_minus_one-skip])
55 break;
56 ++skip;
57 }
58 if (skip > pl_minus_one) // we have a match
59 return (current - uc) - pl_minus_one;
60
61 // in case we don't have a match we are a bit inefficient as we only skip by one
62 // when we have the non matching char in the string.
63 if (skiptable[*(current - skip) & 0xff] == pl)
64 skip = pl - skip;
65 else
66 skip = 1;
67 }
68 if (current > end - skip)
69 break;
70 current += skip;
71 }
72 } else {
73 char16_t foldBuffer[FoldBufferCapacity];
74 const qsizetype foldBufferLength = qMin(a: FoldBufferCapacity, b: pl);
75 const char16_t *start = puc;
76 for (qsizetype i = 0; i < foldBufferLength; ++i)
77 foldBuffer[i] = foldCase(ch: &puc[i], start);
78 QStringView restNeedle = needle.sliced(pos: foldBufferLength);
79 const qsizetype foldBufferEnd = foldBufferLength - 1;
80 const char16_t *current = uc + index + foldBufferEnd;
81 const char16_t *end = uc + l;
82
83 while (current < end) {
84 qsizetype skip = skiptable[foldCase(ch: current, start: uc) & 0xff];
85 if (!skip) {
86 // possible match
87 while (skip < foldBufferLength) {
88 if (foldCase(ch: current - skip, start: uc) != foldBuffer[foldBufferEnd - skip])
89 break;
90 ++skip;
91 }
92 if (skip > foldBufferEnd) { // Matching foldBuffer
93 qsizetype candidatePos = (current - uc) - foldBufferEnd;
94 QStringView restHaystack =
95 haystack.sliced(pos: qMin(a: haystack.size(), b: candidatePos + foldBufferLength));
96 if (restNeedle.size() == 0
97 || restHaystack.startsWith(
98 s: restNeedle, cs: Qt::CaseInsensitive)) // Check the rest of the string
99 return candidatePos;
100 }
101 // in case we don't have a match we are a bit inefficient as we only skip by one
102 // when we have the non matching char in the string.
103 if (skiptable[foldCase(ch: current - skip, start: uc) & 0xff] == foldBufferLength)
104 skip = foldBufferLength - skip;
105 else
106 skip = 1;
107 }
108 if (current > end - skip)
109 break;
110 current += skip;
111 }
112 }
113 return -1; // not found
114}
115
116void QStringMatcher::updateSkipTable()
117{
118 bm_init_skiptable(needle: q_sv, skiptable: q_skiptable, cs: q_cs);
119}
120
121/*!
122 \class QStringMatcher
123 \inmodule QtCore
124 \brief The QStringMatcher class holds a sequence of characters that
125 can be quickly matched in a Unicode string.
126
127 \ingroup tools
128 \ingroup string-processing
129
130 This class is useful when you have a sequence of \l{QChar}s that
131 you want to repeatedly match against some strings (perhaps in a
132 loop), or when you want to search for the same sequence of
133 characters multiple times in the same string. Using a matcher
134 object and indexIn() is faster than matching a plain QString with
135 QString::indexOf() if repeated matching takes place. This class
136 offers no benefit if you are doing one-off string matches.
137
138 Create the QStringMatcher with the QString you want to search
139 for. Then call indexIn() on the QString that you want to search.
140
141 \sa QString, QByteArrayMatcher, QRegularExpression
142*/
143
144/*! \fn QStringMatcher::QStringMatcher()
145
146 Constructs an empty string matcher that won't match anything.
147 Call setPattern() to give it a pattern to match.
148*/
149
150/*!
151 Constructs a string matcher that will search for \a pattern, with
152 case sensitivity \a cs.
153
154 Call indexIn() to perform a search.
155*/
156QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
157 : d_ptr(nullptr), q_cs(cs), q_pattern(pattern)
158{
159 q_sv = q_pattern;
160 updateSkipTable();
161}
162
163/*!
164 \fn QStringMatcher::QStringMatcher(const QChar *uc, qsizetype length, Qt::CaseSensitivity cs)
165 \since 4.5
166
167 Constructs a string matcher that will search for the pattern referred to
168 by \a uc with the given \a length and case sensitivity specified by \a cs.
169*/
170
171/*!
172 \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive)
173 \since 5.14
174
175 Constructs a string matcher that will search for \a pattern, with
176 case sensitivity \a cs.
177
178 Call indexIn() to perform a search.
179*/
180QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs)
181 : d_ptr(nullptr), q_cs(cs), q_sv(str)
182{
183 updateSkipTable();
184}
185/*!
186 Copies the \a other string matcher to this string matcher.
187*/
188QStringMatcher::QStringMatcher(const QStringMatcher &other)
189 : d_ptr(nullptr)
190{
191 operator=(other);
192}
193
194/*!
195 Destroys the string matcher.
196*/
197QStringMatcher::~QStringMatcher()
198{
199 Q_UNUSED(d_ptr);
200}
201
202/*!
203 Assigns the \a other string matcher to this string matcher.
204*/
205QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
206{
207 if (this != &other) {
208 q_pattern = other.q_pattern;
209 q_cs = other.q_cs;
210 q_sv = other.q_sv;
211 memcpy(dest: q_skiptable, src: other.q_skiptable, n: sizeof(q_skiptable));
212 }
213 return *this;
214}
215
216/*!
217 Sets the string that this string matcher will search for to \a
218 pattern.
219
220 \sa pattern(), setCaseSensitivity(), indexIn()
221*/
222void QStringMatcher::setPattern(const QString &pattern)
223{
224 q_pattern = pattern;
225 q_sv = q_pattern;
226 updateSkipTable();
227}
228
229/*!
230 \fn QString QStringMatcher::pattern() const
231
232 Returns the string pattern that this string matcher will search
233 for.
234
235 \sa setPattern()
236*/
237
238QString QStringMatcher::pattern() const
239{
240 if (!q_pattern.isEmpty())
241 return q_pattern;
242 return q_sv.toString();
243}
244
245/*!
246 Sets the case sensitivity setting of this string matcher to \a
247 cs.
248
249 \sa caseSensitivity(), setPattern(), indexIn()
250*/
251void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
252{
253 if (cs == q_cs)
254 return;
255 q_cs = cs;
256 updateSkipTable();
257}
258
259/*! \fn qsizetype QStringMatcher::indexIn(const QString &str, qsizetype from) const
260
261 Searches the string \a str from character position \a from
262 (default 0, i.e. from the first character), for the string
263 pattern() that was set in the constructor or in the most recent
264 call to setPattern(). Returns the position where the pattern()
265 matched in \a str, or -1 if no match was found.
266
267 \sa setPattern(), setCaseSensitivity()
268*/
269
270/*! \fn qsizetype QStringMatcher::indexIn(const QChar *str, qsizetype length, qsizetype from) const
271 \since 4.5
272
273 Searches the string starting at \a str (of length \a length) from
274 character position \a from (default 0, i.e. from the first
275 character), for the string pattern() that was set in the
276 constructor or in the most recent call to setPattern(). Returns
277 the position where the pattern() matched in \a str, or -1 if no
278 match was found.
279
280 \sa setPattern(), setCaseSensitivity()
281*/
282
283/*!
284 \since 5.14
285
286 Searches the string \a str from character position \a from
287 (default 0, i.e. from the first character), for the string
288 pattern() that was set in the constructor or in the most recent
289 call to setPattern(). Returns the position where the pattern()
290 matched in \a str, or -1 if no match was found.
291
292 \sa setPattern(), setCaseSensitivity()
293*/
294qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const
295{
296 if (from < 0)
297 from = 0;
298 return bm_find(haystack: str, index: from, needle: q_sv, skiptable: q_skiptable, cs: q_cs);
299}
300
301/*!
302 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
303
304 Returns the case sensitivity setting for this string matcher.
305
306 \sa setCaseSensitivity()
307*/
308
309/*!
310 \internal
311*/
312
313qsizetype qFindStringBoyerMoore(
314 QStringView haystack, qsizetype haystackOffset,
315 QStringView needle, Qt::CaseSensitivity cs)
316{
317 uchar skiptable[256];
318 bm_init_skiptable(needle, skiptable, cs);
319 if (haystackOffset < 0)
320 haystackOffset = 0;
321 return bm_find(haystack, index: haystackOffset, needle, skiptable, cs);
322}
323
324QT_END_NAMESPACE
325

source code of qtbase/src/corelib/text/qstringmatcher.cpp