1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qbytearraymatcher.h"
5
6#include <limits.h>
7
8QT_BEGIN_NAMESPACE
9
10static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
11{
12 int l = int(qMin(a: len, b: qsizetype(255)));
13 memset(s: skiptable, c: l, n: 256*sizeof(uchar));
14 cc += len - l;
15 while (l--)
16 skiptable[*cc++] = l;
17}
18
19static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc,
20 qsizetype pl, const uchar *skiptable)
21{
22 if (pl == 0)
23 return index > l ? -1 : index;
24 const qsizetype pl_minus_one = pl - 1;
25
26 const uchar *current = cc + index + pl_minus_one;
27 const uchar *end = cc + l;
28 while (current < end) {
29 qsizetype skip = skiptable[*current];
30 if (!skip) {
31 // possible match
32 while (skip < pl) {
33 if (*(current - skip) != puc[pl_minus_one - skip])
34 break;
35 skip++;
36 }
37 if (skip > pl_minus_one) // we have a match
38 return (current - cc) - skip + 1;
39
40 // in case we don't have a match we are a bit inefficient as we only skip by one
41 // when we have the non matching char in the string.
42 if (skiptable[*(current - skip)] == pl)
43 skip = pl - skip;
44 else
45 skip = 1;
46 }
47 if (current > end - skip)
48 break;
49 current += skip;
50 }
51 return -1; // not found
52}
53
54/*! \class QByteArrayMatcher
55 \inmodule QtCore
56 \brief The QByteArrayMatcher class holds a sequence of bytes that
57 can be quickly matched in a byte array.
58
59 \ingroup tools
60 \ingroup string-processing
61
62 This class is useful when you have a sequence of bytes that you
63 want to repeatedly match against some byte arrays (perhaps in a
64 loop), or when you want to search for the same sequence of bytes
65 multiple times in the same byte array. Using a matcher object and
66 indexIn() is faster than matching a plain QByteArray with
67 QByteArray::indexOf() if repeated matching takes place. This
68 class offers no benefit if you are doing one-off byte array
69 matches.
70
71 Create the QByteArrayMatcher with the QByteArray you want to
72 search for. Then call indexIn() on the QByteArray that you want to
73 search.
74
75 \sa QByteArray, QStringMatcher
76*/
77
78/*!
79 Constructs an empty byte array matcher that won't match anything.
80 Call setPattern() to give it a pattern to match.
81*/
82QByteArrayMatcher::QByteArrayMatcher()
83 : d(nullptr)
84{
85 p.p = nullptr;
86 p.l = 0;
87 memset(s: p.q_skiptable, c: 0, n: sizeof(p.q_skiptable));
88}
89
90/*!
91 Constructs a byte array matcher from \a pattern. \a pattern
92 has the given \a length. Call indexIn() to perform a search.
93
94 \note the data that \a pattern is referencing must remain valid while this
95 object is used.
96*/
97QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr)
98{
99 p.p = reinterpret_cast<const uchar *>(pattern);
100 if (length < 0)
101 p.l = qstrlen(str: pattern);
102 else
103 p.l = length;
104 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
105}
106
107/*!
108 Constructs a byte array matcher that will search for \a pattern.
109 Call indexIn() to perform a search.
110*/
111QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
112 : d(nullptr), q_pattern(pattern)
113{
114 p.p = reinterpret_cast<const uchar *>(pattern.constData());
115 p.l = pattern.size();
116 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
117}
118
119/*!
120 \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern)
121 \since 6.3
122 \overload
123
124 Constructs a byte array matcher that will search for \a pattern.
125 Call indexIn() to perform a search.
126
127 \note the data that \a pattern is referencing must remain valid while this
128 object is used.
129*/
130
131/*!
132 Copies the \a other byte array matcher to this byte array matcher.
133*/
134QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
135 : d(nullptr)
136{
137 operator=(other);
138}
139
140/*!
141 Destroys the byte array matcher.
142*/
143QByteArrayMatcher::~QByteArrayMatcher()
144{
145 Q_UNUSED(d);
146}
147
148/*!
149 Assigns the \a other byte array matcher to this byte array matcher.
150*/
151QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
152{
153 q_pattern = other.q_pattern;
154 memcpy(dest: &p, src: &other.p, n: sizeof(p));
155 return *this;
156}
157
158/*!
159 Sets the byte array that this byte array matcher will search for
160 to \a pattern.
161
162 \sa pattern(), indexIn()
163*/
164void QByteArrayMatcher::setPattern(const QByteArray &pattern)
165{
166 q_pattern = pattern;
167 p.p = reinterpret_cast<const uchar *>(pattern.constData());
168 p.l = pattern.size();
169 bm_init_skiptable(cc: p.p, len: p.l, skiptable: p.q_skiptable);
170}
171
172/*!
173 Searches the char string \a str, which has length \a len, from
174 byte position \a from (default 0, i.e. from the first byte), for
175 the byte array pattern() that was set in the constructor or in the
176 most recent call to setPattern(). Returns the position where the
177 pattern() matched in \a str, or -1 if no match was found.
178*/
179qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const
180{
181 if (from < 0)
182 from = 0;
183 return bm_find(cc: reinterpret_cast<const uchar *>(str), l: len, index: from,
184 puc: p.p, pl: p.l, skiptable: p.q_skiptable);
185}
186
187/*!
188 \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
189 \since 6.3
190 \overload
191
192 Searches the byte array \a data, from byte position \a from (default
193 0, i.e. from the first byte), for the byte array pattern() that
194 was set in the constructor or in the most recent call to
195 setPattern(). Returns the position where the pattern() matched in
196 \a data, or -1 if no match was found.
197*/
198qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
199{
200 if (from < 0)
201 from = 0;
202 return bm_find(cc: reinterpret_cast<const uchar *>(data.data()), l: data.size(), index: from,
203 puc: p.p, pl: p.l, skiptable: p.q_skiptable);
204}
205
206/*!
207 \fn QByteArray QByteArrayMatcher::pattern() const
208
209 Returns the byte array pattern that this byte array matcher will
210 search for.
211
212 \sa setPattern()
213*/
214
215
216static qsizetype findChar(const char *str, qsizetype len, char ch, qsizetype from)
217{
218 const uchar *s = (const uchar *)str;
219 uchar c = (uchar)ch;
220 if (from < 0)
221 from = qMax(a: from + len, b: qsizetype(0));
222 if (from < len) {
223 const uchar *n = s + from - 1;
224 const uchar *e = s + len;
225 while (++n != e)
226 if (*n == c)
227 return n - s;
228 }
229 return -1;
230}
231
232/*!
233 \internal
234 */
235static qsizetype qFindByteArrayBoyerMoore(
236 const char *haystack, qsizetype haystackLen, qsizetype haystackOffset,
237 const char *needle, qsizetype needleLen)
238{
239 uchar skiptable[256];
240 bm_init_skiptable(cc: (const uchar *)needle, len: needleLen, skiptable);
241 if (haystackOffset < 0)
242 haystackOffset = 0;
243 return bm_find(cc: (const uchar *)haystack, l: haystackLen, index: haystackOffset,
244 puc: (const uchar *)needle, pl: needleLen, skiptable);
245}
246
247#define REHASH(a) \
248 if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
249 hashHaystack -= std::size_t(a) << sl_minus_1; \
250 hashHaystack <<= 1
251
252/*!
253 \internal
254 */
255qsizetype qFindByteArray(
256 const char *haystack0, qsizetype haystackLen, qsizetype from,
257 const char *needle, qsizetype needleLen)
258{
259 const auto l = haystackLen;
260 const auto sl = needleLen;
261 if (from < 0)
262 from += l;
263 if (std::size_t(sl + from) > std::size_t(l))
264 return -1;
265 if (!sl)
266 return from;
267 if (!l)
268 return -1;
269
270 if (sl == 1)
271 return findChar(str: haystack0, len: haystackLen, ch: needle[0], from);
272
273 /*
274 We use the Boyer-Moore algorithm in cases where the overhead
275 for the skip table should pay off, otherwise we use a simple
276 hash function.
277 */
278 if (l > 500 && sl > 5)
279 return qFindByteArrayBoyerMoore(haystack: haystack0, haystackLen, haystackOffset: from,
280 needle, needleLen);
281
282 /*
283 We use some hashing for efficiency's sake. Instead of
284 comparing strings, we compare the hash value of str with that
285 of a part of this QString. Only if that matches, we call memcmp().
286 */
287 const char *haystack = haystack0 + from;
288 const char *end = haystack0 + (l - sl);
289 const auto sl_minus_1 = std::size_t(sl - 1);
290 std::size_t hashNeedle = 0, hashHaystack = 0;
291 qsizetype idx;
292 for (idx = 0; idx < sl; ++idx) {
293 hashNeedle = ((hashNeedle<<1) + needle[idx]);
294 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
295 }
296 hashHaystack -= *(haystack + sl_minus_1);
297
298 while (haystack <= end) {
299 hashHaystack += *(haystack + sl_minus_1);
300 if (hashHaystack == hashNeedle && *needle == *haystack
301 && memcmp(s1: needle, s2: haystack, n: sl) == 0)
302 return haystack - haystack0;
303
304 REHASH(*haystack);
305 ++haystack;
306 }
307 return -1;
308}
309
310/*!
311 \class QStaticByteArrayMatcherBase
312 \since 5.9
313 \internal
314 \brief Non-template base class of QStaticByteArrayMatcher.
315*/
316
317/*!
318 \class QStaticByteArrayMatcher
319 \since 5.9
320 \inmodule QtCore
321 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
322
323 \ingroup tools
324 \ingroup string-processing
325
326 This class is useful when you have a sequence of bytes that you
327 want to repeatedly match against some byte arrays (perhaps in a
328 loop), or when you want to search for the same sequence of bytes
329 multiple times in the same byte array. Using a matcher object and
330 indexIn() is faster than matching a plain QByteArray with
331 QByteArray::indexOf(), in particular if repeated matching takes place.
332
333 Unlike QByteArrayMatcher, this class calculates the internal
334 representation at \e{compile-time}, so it can
335 even benefit if you are doing one-off byte array matches.
336
337 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
338 passing it the C string literal you want to search for. Store the return
339 value of that function in a \c{static const auto} variable, so you don't need
340 to pass the \c{N} template parameter explicitly:
341
342 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
343
344 Then call indexIn() on the QByteArray in which you want to search, just like
345 with QByteArrayMatcher.
346
347 Since this class is designed to do all the up-front calculations at compile-time,
348 it does not offer a setPattern() method.
349
350 \sa QByteArrayMatcher, QStringMatcher
351*/
352
353/*!
354 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
355
356 Searches the char string \a haystack, which has length \a hlen, from
357 byte position \a from (default 0, i.e. from the first byte), for
358 the byte array pattern() that was set in the constructor.
359
360 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
361*/
362
363/*!
364 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
365
366 Searches the char string \a haystack, from byte position \a from
367 (default 0, i.e. from the first byte), for the byte array pattern()
368 that was set in the constructor.
369
370 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
371*/
372
373/*!
374 \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
375
376 Returns the byte array pattern that this byte array matcher will
377 search for.
378
379 \sa QByteArrayMatcher::setPattern()
380*/
381
382/*!
383 \internal
384*/
385qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
386{
387 if (from < 0)
388 from = 0;
389 return bm_find(cc: reinterpret_cast<const uchar *>(haystack), l: hlen, index: from,
390 puc: reinterpret_cast<const uchar *>(needle), pl: nlen, skiptable: m_skiptable.data);
391}
392
393/*!
394 \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
395 \internal
396*/
397
398/*!
399 \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
400 \since 5.9
401 \relates QStaticByteArrayMatcher
402
403 Return a QStaticByteArrayMatcher with the correct \c{N} determined
404 automatically from the \a pattern passed.
405
406 To take full advantage of this function, assign the result to an
407 \c{auto} variable:
408
409 \snippet code/src_corelib_text_qbytearraymatcher.cpp 1
410*/
411
412
413QT_END_NAMESPACE
414
415#undef REHASH
416

source code of qtbase/src/corelib/text/qbytearraymatcher.cpp