1 | // -*- c-basic-offset: 2 -*- |
2 | // krazy:excludeall=doublequote_chars (UStrings aren't QStrings) |
3 | /* |
4 | * This file is part of the KDE libraries |
5 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
6 | * Copyright (C) 2003 Apple Computer, Inc. |
7 | * |
8 | * This library is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2 of the License, or (at your option) any later version. |
12 | * |
13 | * This library is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with this library; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | * |
22 | */ |
23 | |
24 | #include "regexp_object.h" |
25 | #include <config-kjs.h> |
26 | |
27 | #include "regexp_object.lut.h" |
28 | |
29 | #include <stdio.h> |
30 | #include "value.h" |
31 | #include "object.h" |
32 | #include "types.h" |
33 | #include "nodes.h" |
34 | #include "interpreter.h" |
35 | #include "operations.h" |
36 | #include "internal.h" |
37 | #include "regexp.h" |
38 | #include "error_object.h" |
39 | #include "lookup.h" |
40 | |
41 | using namespace KJS; |
42 | |
43 | // ------------------------------ RegExpPrototype --------------------------- |
44 | |
45 | // ECMA 15.10.5 |
46 | |
47 | const ClassInfo RegExpPrototype::info = {"RegExp" , 0, 0, 0}; |
48 | |
49 | RegExpPrototype::RegExpPrototype(ExecState *exec, |
50 | ObjectPrototype *objProto, |
51 | FunctionPrototype *funcProto) |
52 | : JSObject(objProto) |
53 | { |
54 | static const Identifier* execPropertyName = new Identifier("exec" ); |
55 | static const Identifier* testPropertyName = new Identifier("test" ); |
56 | static const Identifier* compilePropertyName = new Identifier("compile" ); |
57 | |
58 | putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Exec, 0, *execPropertyName), DontEnum); |
59 | putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Test, 0, *testPropertyName), DontEnum); |
60 | putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::ToString, 0, exec->propertyNames().toString), DontEnum); |
61 | putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Compile, 1, *compilePropertyName), DontEnum); |
62 | } |
63 | |
64 | // ------------------------------ RegExpProtoFunc --------------------------- |
65 | |
66 | RegExpProtoFunc::RegExpProtoFunc(ExecState* exec, FunctionPrototype* funcProto, int i, int len, const Identifier& name) |
67 | : InternalFunctionImp(funcProto, name), id(i) |
68 | { |
69 | putDirect(exec->propertyNames().length, len, DontDelete | ReadOnly | DontEnum); |
70 | } |
71 | |
72 | JSValue *RegExpProtoFunc::callAsFunction(ExecState *exec, JSObject *thisObj, const List &args) |
73 | { |
74 | if (!thisObj->inherits(&RegExpImp::info)) { |
75 | if (thisObj->inherits(&RegExpPrototype::info)) { |
76 | switch (id) { |
77 | case ToString: return jsString("//" ); |
78 | } |
79 | } |
80 | |
81 | return throwError(exec, TypeError); |
82 | } |
83 | |
84 | switch (id) { |
85 | case Test: // 15.10.6.2 |
86 | case Exec: |
87 | { |
88 | RegExp *regExp = static_cast<RegExpImp*>(thisObj)->regExp(); |
89 | RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp()); |
90 | |
91 | UString input; |
92 | if (args.isEmpty()) |
93 | input = regExpObj->get(exec, exec->propertyNames().input)->toString(exec); |
94 | else |
95 | input = args[0]->toString(exec); |
96 | |
97 | double lastIndex = thisObj->get(exec, exec->propertyNames().lastIndex)->toInteger(exec); |
98 | |
99 | bool globalFlag = thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec); |
100 | if (!globalFlag) |
101 | lastIndex = 0; |
102 | if (lastIndex < 0 || lastIndex > input.size()) { |
103 | thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); |
104 | return jsNull(); |
105 | } |
106 | |
107 | int foundIndex; |
108 | |
109 | RegExpStringContext ctx(input); |
110 | UString match = regExpObj->performMatch(regExp, exec, ctx, input, static_cast<int>(lastIndex), &foundIndex); |
111 | |
112 | if (exec->hadException()) |
113 | return jsUndefined(); |
114 | |
115 | bool didMatch = !match.isNull(); |
116 | |
117 | if (globalFlag) { |
118 | if (didMatch) |
119 | thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(foundIndex + match.size()), DontDelete | DontEnum); |
120 | else |
121 | thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); |
122 | } |
123 | |
124 | // Test |
125 | if (id == Test) |
126 | return jsBoolean(didMatch); |
127 | |
128 | // Exec |
129 | if (didMatch) { |
130 | return regExpObj->arrayOfMatches(exec, match); |
131 | } else { |
132 | return jsNull(); |
133 | } |
134 | } |
135 | break; |
136 | case ToString: { |
137 | UString result = "/" + thisObj->get(exec, exec->propertyNames().source)->toString(exec) + "/" ; |
138 | if (thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec)) { |
139 | result += "g" ; |
140 | } |
141 | if (thisObj->get(exec, exec->propertyNames().ignoreCase)->toBoolean(exec)) { |
142 | result += "i" ; |
143 | } |
144 | if (thisObj->get(exec, exec->propertyNames().multiline)->toBoolean(exec)) { |
145 | result += "m" ; |
146 | } |
147 | return jsString(result); |
148 | } |
149 | case Compile: { // JS1.2 legacy, but still in use in the wild somewhat |
150 | RegExpImp* instance = static_cast<RegExpImp*>(thisObj); |
151 | RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0]->toString(exec), args[1]); |
152 | if (!newEngine) |
153 | return exec->exception(); |
154 | instance->setRegExp(exec, newEngine); |
155 | return instance; |
156 | } |
157 | } |
158 | |
159 | return jsUndefined(); |
160 | } |
161 | |
162 | // ------------------------------ RegExpImp ------------------------------------ |
163 | |
164 | const ClassInfo RegExpImp::info = {"RegExp" , 0, 0, 0}; |
165 | |
166 | RegExpImp::RegExpImp(RegExpPrototype *regexpProto) |
167 | : JSObject(regexpProto), reg(0L) |
168 | { |
169 | } |
170 | |
171 | RegExpImp::~RegExpImp() |
172 | { |
173 | delete reg; |
174 | } |
175 | |
176 | void RegExpImp::setRegExp(ExecState* exec, RegExp* r) |
177 | { |
178 | delete reg; |
179 | reg = r; |
180 | |
181 | putDirect(exec->propertyNames().global, jsBoolean(r->flags() & RegExp::Global), DontDelete | ReadOnly | DontEnum); |
182 | putDirect(exec->propertyNames().ignoreCase, jsBoolean(r->flags() & RegExp::IgnoreCase), DontDelete | ReadOnly | DontEnum); |
183 | putDirect(exec->propertyNames().multiline, jsBoolean(r->flags() & RegExp::Multiline), DontDelete | ReadOnly | DontEnum); |
184 | |
185 | putDirect(exec->propertyNames().source, jsString(r->pattern()), DontDelete | ReadOnly | DontEnum); |
186 | putDirect(exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum); |
187 | } |
188 | |
189 | JSObject* RegExpImp::valueClone(Interpreter* targetCtx) const |
190 | { |
191 | RegExpImp* copy = new RegExpImp(static_cast<RegExpPrototype*>(targetCtx->builtinRegExpPrototype())); |
192 | copy->setRegExp(targetCtx->globalExec(), new RegExp(reg->pattern(), reg->flags())); |
193 | return copy; |
194 | } |
195 | |
196 | // ------------------------------ RegExpObjectImp ------------------------------ |
197 | |
198 | const ClassInfo RegExpObjectImp::info = {"Function" , &InternalFunctionImp::info, &RegExpTable, 0}; |
199 | |
200 | /* Source for regexp_object.lut.h |
201 | @begin RegExpTable 20 |
202 | input RegExpObjectImp::Input None |
203 | $_ RegExpObjectImp::Input DontEnum |
204 | multiline RegExpObjectImp::Multiline None |
205 | $* RegExpObjectImp::Multiline DontEnum |
206 | lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly |
207 | $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum |
208 | lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly |
209 | $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum |
210 | leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly |
211 | $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum |
212 | rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly |
213 | $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum |
214 | $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly |
215 | $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly |
216 | $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly |
217 | $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly |
218 | $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly |
219 | $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly |
220 | $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly |
221 | $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly |
222 | $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly |
223 | @end |
224 | */ |
225 | |
226 | struct KJS::RegExpObjectImpPrivate { |
227 | // Global search cache / settings |
228 | RegExpObjectImpPrivate() : lastInput("" ), lastNumSubPatterns(0), multiline(false) { } |
229 | UString lastInput; |
230 | OwnArrayPtr<int> lastOvector; |
231 | unsigned lastNumSubPatterns : 31; |
232 | bool multiline : 1; |
233 | }; |
234 | |
235 | RegExpObjectImp::RegExpObjectImp(ExecState* exec, |
236 | FunctionPrototype *funcProto, |
237 | RegExpPrototype *regProto) |
238 | |
239 | : InternalFunctionImp(funcProto), |
240 | d(new RegExpObjectImpPrivate) |
241 | { |
242 | // ECMA 15.10.5.1 RegExp.prototype |
243 | putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly); |
244 | |
245 | // no. of arguments for constructor |
246 | putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum); |
247 | } |
248 | |
249 | void RegExpObjectImp::throwRegExpError(ExecState* exec) |
250 | { |
251 | throwError(exec, RangeError, "Resource exhaustion trying to perform regexp match." ); |
252 | } |
253 | |
254 | /* |
255 | To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular |
256 | expression matching through the performMatch function. We use cached results to calculate, |
257 | e.g., RegExp.lastMatch and RegExp.leftParen. |
258 | */ |
259 | UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const RegExpStringContext& c, |
260 | const UString& s, |
261 | int startOffset, int *endOffset, int **ovector) |
262 | { |
263 | int tmpOffset; |
264 | int *tmpOvector; |
265 | bool error = false; |
266 | UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector); |
267 | if (error) { |
268 | if (endOffset) |
269 | *endOffset = -1; |
270 | throwRegExpError(exec); |
271 | return match; |
272 | } |
273 | |
274 | if (endOffset) |
275 | *endOffset = tmpOffset; |
276 | if (ovector) |
277 | *ovector = tmpOvector; |
278 | |
279 | if (!match.isNull()) { |
280 | ASSERT(tmpOvector); |
281 | |
282 | d->lastInput = s; |
283 | d->lastOvector.set(tmpOvector); |
284 | d->lastNumSubPatterns = r->subPatterns(); |
285 | } |
286 | |
287 | return match; |
288 | } |
289 | |
290 | JSObject *RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const |
291 | { |
292 | List list; |
293 | // The returned array contains 'result' as first item, followed by the list of matches |
294 | list.append(jsString(result)); |
295 | if ( d->lastOvector ) |
296 | for ( int i = 1 ; i < d->lastNumSubPatterns + 1 ; ++i ) |
297 | { |
298 | int start = d->lastOvector[2*i]; |
299 | if (start == -1) |
300 | list.append(jsUndefined()); |
301 | else { |
302 | UString substring = d->lastInput.substr( start, d->lastOvector[2*i+1] - start ); |
303 | list.append(jsString(substring)); |
304 | } |
305 | } |
306 | JSObject *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list); |
307 | arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0])); |
308 | arr->put(exec, exec->propertyNames().input, jsString(d->lastInput)); |
309 | return arr; |
310 | } |
311 | |
312 | JSValue *RegExpObjectImp::getBackref(int i) const |
313 | { |
314 | if (d->lastOvector && i < int(d->lastNumSubPatterns + 1)) { |
315 | UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i] ); |
316 | return jsString(substring); |
317 | } |
318 | |
319 | return jsString("" ); |
320 | } |
321 | |
322 | JSValue *RegExpObjectImp::getLastMatch() const |
323 | { |
324 | if (d->lastOvector) { |
325 | UString substring = d->lastInput.substr(d->lastOvector[0], d->lastOvector[1] - d->lastOvector[0]); |
326 | return jsString(substring); |
327 | } |
328 | |
329 | return jsString("" ); |
330 | } |
331 | |
332 | JSValue *RegExpObjectImp::getLastParen() const |
333 | { |
334 | int i = d->lastNumSubPatterns; |
335 | if (i > 0) { |
336 | ASSERT(d->lastOvector); |
337 | UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i]); |
338 | return jsString(substring); |
339 | } |
340 | |
341 | return jsString("" ); |
342 | } |
343 | |
344 | JSValue *RegExpObjectImp::getLeftContext() const |
345 | { |
346 | if (d->lastOvector) { |
347 | UString substring = d->lastInput.substr(0, d->lastOvector[0]); |
348 | return jsString(substring); |
349 | } |
350 | |
351 | return jsString("" ); |
352 | } |
353 | |
354 | JSValue *RegExpObjectImp::getRightContext() const |
355 | { |
356 | if (d->lastOvector) { |
357 | UString s = d->lastInput; |
358 | UString substring = s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]); |
359 | return jsString(substring); |
360 | } |
361 | |
362 | return jsString("" ); |
363 | } |
364 | |
365 | bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot) |
366 | { |
367 | return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot); |
368 | } |
369 | |
370 | JSValue *RegExpObjectImp::getValueProperty(ExecState*, int token) const |
371 | { |
372 | switch (token) { |
373 | case Dollar1: |
374 | return getBackref(1); |
375 | case Dollar2: |
376 | return getBackref(2); |
377 | case Dollar3: |
378 | return getBackref(3); |
379 | case Dollar4: |
380 | return getBackref(4); |
381 | case Dollar5: |
382 | return getBackref(5); |
383 | case Dollar6: |
384 | return getBackref(6); |
385 | case Dollar7: |
386 | return getBackref(7); |
387 | case Dollar8: |
388 | return getBackref(8); |
389 | case Dollar9: |
390 | return getBackref(9); |
391 | case Input: |
392 | return jsString(d->lastInput); |
393 | case Multiline: |
394 | return jsBoolean(d->multiline); |
395 | case LastMatch: |
396 | return getLastMatch(); |
397 | case LastParen: |
398 | return getLastParen(); |
399 | case LeftContext: |
400 | return getLeftContext(); |
401 | case RightContext: |
402 | return getRightContext(); |
403 | default: |
404 | ASSERT(0); |
405 | } |
406 | |
407 | return jsString("" ); |
408 | } |
409 | |
410 | void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr) |
411 | { |
412 | lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this); |
413 | } |
414 | |
415 | void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int /*attr*/) |
416 | { |
417 | switch (token) { |
418 | case Input: |
419 | d->lastInput = value->toString(exec); |
420 | break; |
421 | case Multiline: |
422 | d->multiline = value->toBoolean(exec); |
423 | break; |
424 | default: |
425 | ASSERT(0); |
426 | } |
427 | } |
428 | |
429 | bool RegExpObjectImp::implementsConstruct() const |
430 | { |
431 | return true; |
432 | } |
433 | |
434 | RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, JSValue *flagsInput) |
435 | { |
436 | int reflags = RegExp::None; |
437 | |
438 | if (!flagsInput->isUndefined()) { |
439 | const UString flags = flagsInput->toString(exec); |
440 | |
441 | // Check flags |
442 | for (int pos = 0; pos < flags.size(); ++pos) { |
443 | switch (flags[pos].unicode()) { |
444 | case 'g': |
445 | if (reflags & RegExp::Global) { |
446 | throwError(exec, SyntaxError, |
447 | "Regular expression flag 'g' given twice" , 1, -1, "<regexp>" ); |
448 | return 0; |
449 | } |
450 | reflags |= RegExp::Global; |
451 | break; |
452 | case 'i': |
453 | if (reflags & RegExp::IgnoreCase) { |
454 | throwError(exec, SyntaxError, |
455 | "Regular expression flag 'i' given twice" , 1, -1, "<regexp>" ); |
456 | return 0; |
457 | } |
458 | reflags |= RegExp::IgnoreCase; |
459 | break; |
460 | case 'm': |
461 | if (reflags & RegExp::Multiline) { |
462 | throwError(exec, SyntaxError, |
463 | "Regular expression flag 'm' given twice" , 1, -1, "<regexp>" ); |
464 | return 0; |
465 | } |
466 | reflags |= RegExp::Multiline; |
467 | break; |
468 | default: { |
469 | throwError(exec, SyntaxError, |
470 | "Invalid regular expression flags" , 1, -1, "<regexp>" ); |
471 | return 0; |
472 | } |
473 | } |
474 | } |
475 | } |
476 | |
477 | RegExp *re = new RegExp(p, reflags); |
478 | if (!re->isValid()) { |
479 | throwError(exec, SyntaxError, |
480 | "Invalid regular expression" , 1, -1, "<regexp>" ); |
481 | delete re; |
482 | return 0; |
483 | } |
484 | return re; |
485 | } |
486 | |
487 | |
488 | // ECMA 15.10.4 |
489 | JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args) |
490 | { |
491 | JSObject *o = args[0]->getObject(); |
492 | if (o && o->inherits(&RegExpImp::info)) { |
493 | if (!args[1]->isUndefined()) |
494 | return throwError(exec, TypeError); |
495 | return o; |
496 | } |
497 | |
498 | UString p = args[0]->isUndefined() ? UString("" ) : args[0]->toString(exec); |
499 | |
500 | RegExp* re = makeEngine(exec, p, args[1]); |
501 | if (!re) |
502 | return exec->exception()->toObject(exec); |
503 | |
504 | |
505 | RegExpPrototype *proto = static_cast<RegExpPrototype*>(exec->lexicalInterpreter()->builtinRegExpPrototype()); |
506 | RegExpImp *dat = new RegExpImp(proto); |
507 | |
508 | dat->setRegExp(exec, re); |
509 | |
510 | return dat; |
511 | } |
512 | |
513 | // ECMA 15.10.3 |
514 | JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args) |
515 | { |
516 | // The RegExp argument case is handled by construct() |
517 | |
518 | return construct(exec, args); |
519 | } |
520 | |