1/****************************************************************************
2**
3** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4** Contact: http://www.qt-project.org/legal
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Digia. For licensing terms and
14** conditions see http://qt.digia.com/licensing. For further information
15** use the contact form at http://qt.digia.com/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 2.1 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 2.1 requirements
23** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24**
25** In addition, as a special exception, Digia gives you certain additional
26** rights. These rights are described in the Digia Qt LGPL Exception
27** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28**
29** GNU General Public License Usage
30** Alternatively, this file may be used under the terms of the GNU
31** General Public License version 3.0 as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL included in the
33** packaging of this file. Please review the following information to
34** ensure the GNU General Public License version 3.0 requirements will be
35** met: http://www.gnu.org/copyleft/gpl.html.
36**
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qstringmatcher.h"
43
44QT_BEGIN_NAMESPACE
45
46static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs)
47{
48 int l = qMin(len, 255);
49 memset(skiptable, l, 256*sizeof(uchar));
50 uc += len - l;
51 if (cs == Qt::CaseSensitive) {
52 while (l--) {
53 skiptable[*uc & 0xff] = l;
54 uc++;
55 }
56 } else {
57 const ushort *start = uc;
58 while (l--) {
59 skiptable[foldCase(uc, start) & 0xff] = l;
60 uc++;
61 }
62 }
63}
64
65static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl,
66 const uchar *skiptable, Qt::CaseSensitivity cs)
67{
68 if (pl == 0)
69 return index > (int)l ? -1 : index;
70 const uint pl_minus_one = pl - 1;
71
72 register const ushort *current = uc + index + pl_minus_one;
73 const ushort *end = uc + l;
74 if (cs == Qt::CaseSensitive) {
75 while (current < end) {
76 uint skip = skiptable[*current & 0xff];
77 if (!skip) {
78 // possible match
79 while (skip < pl) {
80 if (*(current - skip) != puc[pl_minus_one-skip])
81 break;
82 skip++;
83 }
84 if (skip > pl_minus_one) // we have a match
85 return (current - uc) - pl_minus_one;
86
87 // in case we don't have a match we are a bit inefficient as we only skip by one
88 // when we have the non matching char in the string.
89 if (skiptable[*(current - skip) & 0xff] == pl)
90 skip = pl - skip;
91 else
92 skip = 1;
93 }
94 if (current > end - skip)
95 break;
96 current += skip;
97 }
98 } else {
99 while (current < end) {
100 uint skip = skiptable[foldCase(current, uc) & 0xff];
101 if (!skip) {
102 // possible match
103 while (skip < pl) {
104 if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
105 break;
106 skip++;
107 }
108 if (skip > pl_minus_one) // we have a match
109 return (current - uc) - pl_minus_one;
110 // in case we don't have a match we are a bit inefficient as we only skip by one
111 // when we have the non matching char in the string.
112 if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
113 skip = pl - skip;
114 else
115 skip = 1;
116 }
117 if (current > end - skip)
118 break;
119 current += skip;
120 }
121 }
122 return -1; // not found
123}
124
125/*!
126 \class QStringMatcher
127 \brief The QStringMatcher class holds a sequence of characters that
128 can be quickly matched in a Unicode string.
129
130 \ingroup tools
131 \ingroup string-processing
132
133 This class is useful when you have a sequence of \l{QChar}s that
134 you want to repeatedly match against some strings (perhaps in a
135 loop), or when you want to search for the same sequence of
136 characters multiple times in the same string. Using a matcher
137 object and indexIn() is faster than matching a plain QString with
138 QString::indexOf() if repeated matching takes place. This class
139 offers no benefit if you are doing one-off string matches.
140
141 Create the QStringMatcher with the QString you want to search
142 for. Then call indexIn() on the QString that you want to search.
143
144 \sa QString, QByteArrayMatcher, QRegExp
145*/
146
147/*!
148 Constructs an empty string matcher that won't match anything.
149 Call setPattern() to give it a pattern to match.
150*/
151QStringMatcher::QStringMatcher()
152 : d_ptr(0), q_cs(Qt::CaseSensitive)
153{
154 qMemSet(q_data, 0, sizeof(q_data));
155}
156
157/*!
158 Constructs a string matcher that will search for \a pattern, with
159 case sensitivity \a cs.
160
161 Call indexIn() to perform a search.
162*/
163QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
164 : d_ptr(0), q_pattern(pattern), q_cs(cs)
165{
166 p.uc = pattern.unicode();
167 p.len = pattern.size();
168 bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs);
169}
170
171/*!
172 \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
173 \since 4.5
174
175 Constructs a string matcher that will search for the pattern referred to
176 by \a uc with the given \a length and case sensitivity specified by \a cs.
177*/
178QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
179 : d_ptr(0), q_cs(cs)
180{
181 p.uc = uc;
182 p.len = len;
183 bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs);
184}
185
186/*!
187 Copies the \a other string matcher to this string matcher.
188*/
189QStringMatcher::QStringMatcher(const QStringMatcher &other)
190 : d_ptr(0)
191{
192 operator=(other);
193}
194
195/*!
196 Destroys the string matcher.
197*/
198QStringMatcher::~QStringMatcher()
199{
200}
201
202/*!
203 Assigns the \a other string matcher to this string matcher.
204*/
205QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
206{
207 if (this != &other) {
208 q_pattern = other.q_pattern;
209 q_cs = other.q_cs;
210 memcpy(q_data, other.q_data, sizeof(q_data));
211 }
212 return *this;
213}
214
215/*!
216 Sets the string that this string matcher will search for to \a
217 pattern.
218
219 \sa pattern(), setCaseSensitivity(), indexIn()
220*/
221void QStringMatcher::setPattern(const QString &pattern)
222{
223 q_pattern = pattern;
224 p.uc = pattern.unicode();
225 p.len = pattern.size();
226 bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs);
227}
228
229/*!
230 \fn QString QStringMatcher::pattern() const
231
232 Returns the string pattern that this string matcher will search
233 for.
234
235 \sa setPattern()
236*/
237
238QString QStringMatcher::pattern() const
239{
240 if (!q_pattern.isEmpty())
241 return q_pattern;
242 return QString(p.uc, p.len);
243}
244
245/*!
246 Sets the case sensitivity setting of this string matcher to \a
247 cs.
248
249 \sa caseSensitivity(), setPattern(), indexIn()
250*/
251void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
252{
253 if (cs == q_cs)
254 return;
255 bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs);
256 q_cs = cs;
257}
258
259/*!
260 Searches the string \a str from character position \a from
261 (default 0, i.e. from the first character), for the string
262 pattern() that was set in the constructor or in the most recent
263 call to setPattern(). Returns the position where the pattern()
264 matched in \a str, or -1 if no match was found.
265
266 \sa setPattern(), setCaseSensitivity()
267*/
268int QStringMatcher::indexIn(const QString &str, int from) const
269{
270 if (from < 0)
271 from = 0;
272 return bm_find((const ushort *)str.unicode(), str.size(), from,
273 (const ushort *)p.uc, p.len,
274 p.q_skiptable, q_cs);
275}
276
277/*!
278 \since 4.5
279
280 Searches the string starting at \a str (of length \a length) from
281 character position \a from (default 0, i.e. from the first
282 character), for the string pattern() that was set in the
283 constructor or in the most recent call to setPattern(). Returns
284 the position where the pattern() matched in \a str, or -1 if no
285 match was found.
286
287 \sa setPattern(), setCaseSensitivity()
288*/
289int QStringMatcher::indexIn(const QChar *str, int length, int from) const
290{
291 if (from < 0)
292 from = 0;
293 return bm_find((const ushort *)str, length, from,
294 (const ushort *)p.uc, p.len,
295 p.q_skiptable, q_cs);
296}
297
298/*!
299 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
300
301 Returns the case sensitivity setting for this string matcher.
302
303 \sa setCaseSensitivity()
304*/
305
306/*!
307 \internal
308*/
309
310int qFindStringBoyerMoore(
311 const QChar *haystack, int haystackLen, int haystackOffset,
312 const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
313{
314 uchar skiptable[256];
315 bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs);
316 if (haystackOffset < 0)
317 haystackOffset = 0;
318 return bm_find((const ushort *)haystack, haystackLen, haystackOffset,
319 (const ushort *)needle, needleLen, skiptable, cs);
320}
321
322QT_END_NAMESPACE
323