1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies). |
4 | ** Contact: http://www.qt-project.org/legal |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and Digia. For licensing terms and |
14 | ** conditions see http://qt.digia.com/licensing. For further information |
15 | ** use the contact form at http://qt.digia.com/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 2.1 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 2.1 requirements |
23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
24 | ** |
25 | ** In addition, as a special exception, Digia gives you certain additional |
26 | ** rights. These rights are described in the Digia Qt LGPL Exception |
27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
28 | ** |
29 | ** GNU General Public License Usage |
30 | ** Alternatively, this file may be used under the terms of the GNU |
31 | ** General Public License version 3.0 as published by the Free Software |
32 | ** Foundation and appearing in the file LICENSE.GPL included in the |
33 | ** packaging of this file. Please review the following information to |
34 | ** ensure the GNU General Public License version 3.0 requirements will be |
35 | ** met: http://www.gnu.org/copyleft/gpl.html. |
36 | ** |
37 | ** |
38 | ** $QT_END_LICENSE$ |
39 | ** |
40 | ****************************************************************************/ |
41 | |
42 | #include "qstringmatcher.h" |
43 | |
44 | QT_BEGIN_NAMESPACE |
45 | |
46 | static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs) |
47 | { |
48 | int l = qMin(len, 255); |
49 | memset(skiptable, l, 256*sizeof(uchar)); |
50 | uc += len - l; |
51 | if (cs == Qt::CaseSensitive) { |
52 | while (l--) { |
53 | skiptable[*uc & 0xff] = l; |
54 | uc++; |
55 | } |
56 | } else { |
57 | const ushort *start = uc; |
58 | while (l--) { |
59 | skiptable[foldCase(uc, start) & 0xff] = l; |
60 | uc++; |
61 | } |
62 | } |
63 | } |
64 | |
65 | static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl, |
66 | const uchar *skiptable, Qt::CaseSensitivity cs) |
67 | { |
68 | if (pl == 0) |
69 | return index > (int)l ? -1 : index; |
70 | const uint pl_minus_one = pl - 1; |
71 | |
72 | register const ushort *current = uc + index + pl_minus_one; |
73 | const ushort *end = uc + l; |
74 | if (cs == Qt::CaseSensitive) { |
75 | while (current < end) { |
76 | uint skip = skiptable[*current & 0xff]; |
77 | if (!skip) { |
78 | // possible match |
79 | while (skip < pl) { |
80 | if (*(current - skip) != puc[pl_minus_one-skip]) |
81 | break; |
82 | skip++; |
83 | } |
84 | if (skip > pl_minus_one) // we have a match |
85 | return (current - uc) - pl_minus_one; |
86 | |
87 | // in case we don't have a match we are a bit inefficient as we only skip by one |
88 | // when we have the non matching char in the string. |
89 | if (skiptable[*(current - skip) & 0xff] == pl) |
90 | skip = pl - skip; |
91 | else |
92 | skip = 1; |
93 | } |
94 | if (current > end - skip) |
95 | break; |
96 | current += skip; |
97 | } |
98 | } else { |
99 | while (current < end) { |
100 | uint skip = skiptable[foldCase(current, uc) & 0xff]; |
101 | if (!skip) { |
102 | // possible match |
103 | while (skip < pl) { |
104 | if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc)) |
105 | break; |
106 | skip++; |
107 | } |
108 | if (skip > pl_minus_one) // we have a match |
109 | return (current - uc) - pl_minus_one; |
110 | // in case we don't have a match we are a bit inefficient as we only skip by one |
111 | // when we have the non matching char in the string. |
112 | if (skiptable[foldCase(current - skip, uc) & 0xff] == pl) |
113 | skip = pl - skip; |
114 | else |
115 | skip = 1; |
116 | } |
117 | if (current > end - skip) |
118 | break; |
119 | current += skip; |
120 | } |
121 | } |
122 | return -1; // not found |
123 | } |
124 | |
125 | /*! |
126 | \class QStringMatcher |
127 | \brief The QStringMatcher class holds a sequence of characters that |
128 | can be quickly matched in a Unicode string. |
129 | |
130 | \ingroup tools |
131 | \ingroup string-processing |
132 | |
133 | This class is useful when you have a sequence of \l{QChar}s that |
134 | you want to repeatedly match against some strings (perhaps in a |
135 | loop), or when you want to search for the same sequence of |
136 | characters multiple times in the same string. Using a matcher |
137 | object and indexIn() is faster than matching a plain QString with |
138 | QString::indexOf() if repeated matching takes place. This class |
139 | offers no benefit if you are doing one-off string matches. |
140 | |
141 | Create the QStringMatcher with the QString you want to search |
142 | for. Then call indexIn() on the QString that you want to search. |
143 | |
144 | \sa QString, QByteArrayMatcher, QRegExp |
145 | */ |
146 | |
147 | /*! |
148 | Constructs an empty string matcher that won't match anything. |
149 | Call setPattern() to give it a pattern to match. |
150 | */ |
151 | QStringMatcher::QStringMatcher() |
152 | : d_ptr(0), q_cs(Qt::CaseSensitive) |
153 | { |
154 | qMemSet(q_data, 0, sizeof(q_data)); |
155 | } |
156 | |
157 | /*! |
158 | Constructs a string matcher that will search for \a pattern, with |
159 | case sensitivity \a cs. |
160 | |
161 | Call indexIn() to perform a search. |
162 | */ |
163 | QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs) |
164 | : d_ptr(0), q_pattern(pattern), q_cs(cs) |
165 | { |
166 | p.uc = pattern.unicode(); |
167 | p.len = pattern.size(); |
168 | bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs); |
169 | } |
170 | |
171 | /*! |
172 | \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs) |
173 | \since 4.5 |
174 | |
175 | Constructs a string matcher that will search for the pattern referred to |
176 | by \a uc with the given \a length and case sensitivity specified by \a cs. |
177 | */ |
178 | QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs) |
179 | : d_ptr(0), q_cs(cs) |
180 | { |
181 | p.uc = uc; |
182 | p.len = len; |
183 | bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs); |
184 | } |
185 | |
186 | /*! |
187 | Copies the \a other string matcher to this string matcher. |
188 | */ |
189 | QStringMatcher::QStringMatcher(const QStringMatcher &other) |
190 | : d_ptr(0) |
191 | { |
192 | operator=(other); |
193 | } |
194 | |
195 | /*! |
196 | Destroys the string matcher. |
197 | */ |
198 | QStringMatcher::~QStringMatcher() |
199 | { |
200 | } |
201 | |
202 | /*! |
203 | Assigns the \a other string matcher to this string matcher. |
204 | */ |
205 | QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other) |
206 | { |
207 | if (this != &other) { |
208 | q_pattern = other.q_pattern; |
209 | q_cs = other.q_cs; |
210 | memcpy(q_data, other.q_data, sizeof(q_data)); |
211 | } |
212 | return *this; |
213 | } |
214 | |
215 | /*! |
216 | Sets the string that this string matcher will search for to \a |
217 | pattern. |
218 | |
219 | \sa pattern(), setCaseSensitivity(), indexIn() |
220 | */ |
221 | void QStringMatcher::setPattern(const QString &pattern) |
222 | { |
223 | q_pattern = pattern; |
224 | p.uc = pattern.unicode(); |
225 | p.len = pattern.size(); |
226 | bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs); |
227 | } |
228 | |
229 | /*! |
230 | \fn QString QStringMatcher::pattern() const |
231 | |
232 | Returns the string pattern that this string matcher will search |
233 | for. |
234 | |
235 | \sa setPattern() |
236 | */ |
237 | |
238 | QString QStringMatcher::pattern() const |
239 | { |
240 | if (!q_pattern.isEmpty()) |
241 | return q_pattern; |
242 | return QString(p.uc, p.len); |
243 | } |
244 | |
245 | /*! |
246 | Sets the case sensitivity setting of this string matcher to \a |
247 | cs. |
248 | |
249 | \sa caseSensitivity(), setPattern(), indexIn() |
250 | */ |
251 | void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) |
252 | { |
253 | if (cs == q_cs) |
254 | return; |
255 | bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs); |
256 | q_cs = cs; |
257 | } |
258 | |
259 | /*! |
260 | Searches the string \a str from character position \a from |
261 | (default 0, i.e. from the first character), for the string |
262 | pattern() that was set in the constructor or in the most recent |
263 | call to setPattern(). Returns the position where the pattern() |
264 | matched in \a str, or -1 if no match was found. |
265 | |
266 | \sa setPattern(), setCaseSensitivity() |
267 | */ |
268 | int QStringMatcher::indexIn(const QString &str, int from) const |
269 | { |
270 | if (from < 0) |
271 | from = 0; |
272 | return bm_find((const ushort *)str.unicode(), str.size(), from, |
273 | (const ushort *)p.uc, p.len, |
274 | p.q_skiptable, q_cs); |
275 | } |
276 | |
277 | /*! |
278 | \since 4.5 |
279 | |
280 | Searches the string starting at \a str (of length \a length) from |
281 | character position \a from (default 0, i.e. from the first |
282 | character), for the string pattern() that was set in the |
283 | constructor or in the most recent call to setPattern(). Returns |
284 | the position where the pattern() matched in \a str, or -1 if no |
285 | match was found. |
286 | |
287 | \sa setPattern(), setCaseSensitivity() |
288 | */ |
289 | int QStringMatcher::indexIn(const QChar *str, int length, int from) const |
290 | { |
291 | if (from < 0) |
292 | from = 0; |
293 | return bm_find((const ushort *)str, length, from, |
294 | (const ushort *)p.uc, p.len, |
295 | p.q_skiptable, q_cs); |
296 | } |
297 | |
298 | /*! |
299 | \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const |
300 | |
301 | Returns the case sensitivity setting for this string matcher. |
302 | |
303 | \sa setCaseSensitivity() |
304 | */ |
305 | |
306 | /*! |
307 | \internal |
308 | */ |
309 | |
310 | int qFindStringBoyerMoore( |
311 | const QChar *haystack, int haystackLen, int haystackOffset, |
312 | const QChar *needle, int needleLen, Qt::CaseSensitivity cs) |
313 | { |
314 | uchar skiptable[256]; |
315 | bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs); |
316 | if (haystackOffset < 0) |
317 | haystackOffset = 0; |
318 | return bm_find((const ushort *)haystack, haystackLen, haystackOffset, |
319 | (const ushort *)needle, needleLen, skiptable, cs); |
320 | } |
321 | |
322 | QT_END_NAMESPACE |
323 | |