1// -*- c-basic-offset: 2 -*-
2// krazy:excludeall=doublequote_chars (UStrings aren't QStrings)
3/*
4 * This file is part of the KDE libraries
5 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
6 * Copyright (C) 2003 Apple Computer, Inc.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 */
23
24#include "regexp_object.h"
25#include <config-kjs.h>
26
27#include "regexp_object.lut.h"
28
29#include <stdio.h>
30#include "value.h"
31#include "object.h"
32#include "types.h"
33#include "nodes.h"
34#include "interpreter.h"
35#include "operations.h"
36#include "internal.h"
37#include "regexp.h"
38#include "error_object.h"
39#include "lookup.h"
40
41using namespace KJS;
42
43// ------------------------------ RegExpPrototype ---------------------------
44
45// ECMA 15.10.5
46
47const ClassInfo RegExpPrototype::info = {"RegExp", 0, 0, 0};
48
49RegExpPrototype::RegExpPrototype(ExecState *exec,
50 ObjectPrototype *objProto,
51 FunctionPrototype *funcProto)
52 : JSObject(objProto)
53{
54 static const Identifier* execPropertyName = new Identifier("exec");
55 static const Identifier* testPropertyName = new Identifier("test");
56 static const Identifier* compilePropertyName = new Identifier("compile");
57
58 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Exec, 0, *execPropertyName), DontEnum);
59 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Test, 0, *testPropertyName), DontEnum);
60 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::ToString, 0, exec->propertyNames().toString), DontEnum);
61 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Compile, 1, *compilePropertyName), DontEnum);
62}
63
64// ------------------------------ RegExpProtoFunc ---------------------------
65
66RegExpProtoFunc::RegExpProtoFunc(ExecState* exec, FunctionPrototype* funcProto, int i, int len, const Identifier& name)
67 : InternalFunctionImp(funcProto, name), id(i)
68{
69 putDirect(exec->propertyNames().length, len, DontDelete | ReadOnly | DontEnum);
70}
71
72JSValue *RegExpProtoFunc::callAsFunction(ExecState *exec, JSObject *thisObj, const List &args)
73{
74 if (!thisObj->inherits(&RegExpImp::info)) {
75 if (thisObj->inherits(&RegExpPrototype::info)) {
76 switch (id) {
77 case ToString: return jsString("//");
78 }
79 }
80
81 return throwError(exec, TypeError);
82 }
83
84 switch (id) {
85 case Test: // 15.10.6.2
86 case Exec:
87 {
88 RegExp *regExp = static_cast<RegExpImp*>(thisObj)->regExp();
89 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp());
90
91 UString input;
92 if (args.isEmpty())
93 input = regExpObj->get(exec, exec->propertyNames().input)->toString(exec);
94 else
95 input = args[0]->toString(exec);
96
97 double lastIndex = thisObj->get(exec, exec->propertyNames().lastIndex)->toInteger(exec);
98
99 bool globalFlag = thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec);
100 if (!globalFlag)
101 lastIndex = 0;
102 if (lastIndex < 0 || lastIndex > input.size()) {
103 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
104 return jsNull();
105 }
106
107 int foundIndex;
108
109 RegExpStringContext ctx(input);
110 UString match = regExpObj->performMatch(regExp, exec, ctx, input, static_cast<int>(lastIndex), &foundIndex);
111
112 if (exec->hadException())
113 return jsUndefined();
114
115 bool didMatch = !match.isNull();
116
117 if (globalFlag) {
118 if (didMatch)
119 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(foundIndex + match.size()), DontDelete | DontEnum);
120 else
121 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
122 }
123
124 // Test
125 if (id == Test)
126 return jsBoolean(didMatch);
127
128 // Exec
129 if (didMatch) {
130 return regExpObj->arrayOfMatches(exec, match);
131 } else {
132 return jsNull();
133 }
134 }
135 break;
136 case ToString: {
137 UString result = "/" + thisObj->get(exec, exec->propertyNames().source)->toString(exec) + "/";
138 if (thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec)) {
139 result += "g";
140 }
141 if (thisObj->get(exec, exec->propertyNames().ignoreCase)->toBoolean(exec)) {
142 result += "i";
143 }
144 if (thisObj->get(exec, exec->propertyNames().multiline)->toBoolean(exec)) {
145 result += "m";
146 }
147 return jsString(result);
148 }
149 case Compile: { // JS1.2 legacy, but still in use in the wild somewhat
150 RegExpImp* instance = static_cast<RegExpImp*>(thisObj);
151 RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0]->toString(exec), args[1]);
152 if (!newEngine)
153 return exec->exception();
154 instance->setRegExp(exec, newEngine);
155 return instance;
156 }
157 }
158
159 return jsUndefined();
160}
161
162// ------------------------------ RegExpImp ------------------------------------
163
164const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0};
165
166RegExpImp::RegExpImp(RegExpPrototype *regexpProto)
167 : JSObject(regexpProto), reg(0L)
168{
169}
170
171RegExpImp::~RegExpImp()
172{
173 delete reg;
174}
175
176void RegExpImp::setRegExp(ExecState* exec, RegExp* r)
177{
178 delete reg;
179 reg = r;
180
181 putDirect(exec->propertyNames().global, jsBoolean(r->flags() & RegExp::Global), DontDelete | ReadOnly | DontEnum);
182 putDirect(exec->propertyNames().ignoreCase, jsBoolean(r->flags() & RegExp::IgnoreCase), DontDelete | ReadOnly | DontEnum);
183 putDirect(exec->propertyNames().multiline, jsBoolean(r->flags() & RegExp::Multiline), DontDelete | ReadOnly | DontEnum);
184
185 putDirect(exec->propertyNames().source, jsString(r->pattern()), DontDelete | ReadOnly | DontEnum);
186 putDirect(exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
187}
188
189JSObject* RegExpImp::valueClone(Interpreter* targetCtx) const
190{
191 RegExpImp* copy = new RegExpImp(static_cast<RegExpPrototype*>(targetCtx->builtinRegExpPrototype()));
192 copy->setRegExp(targetCtx->globalExec(), new RegExp(reg->pattern(), reg->flags()));
193 return copy;
194}
195
196// ------------------------------ RegExpObjectImp ------------------------------
197
198const ClassInfo RegExpObjectImp::info = {"Function", &InternalFunctionImp::info, &RegExpTable, 0};
199
200/* Source for regexp_object.lut.h
201@begin RegExpTable 20
202 input RegExpObjectImp::Input None
203 $_ RegExpObjectImp::Input DontEnum
204 multiline RegExpObjectImp::Multiline None
205 $* RegExpObjectImp::Multiline DontEnum
206 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly
207 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum
208 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly
209 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum
210 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly
211 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum
212 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly
213 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum
214 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly
215 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly
216 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly
217 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly
218 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly
219 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly
220 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly
221 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly
222 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly
223@end
224*/
225
226struct KJS::RegExpObjectImpPrivate {
227 // Global search cache / settings
228 RegExpObjectImpPrivate() : lastInput(""), lastNumSubPatterns(0), multiline(false) { }
229 UString lastInput;
230 OwnArrayPtr<int> lastOvector;
231 unsigned lastNumSubPatterns : 31;
232 bool multiline : 1;
233};
234
235RegExpObjectImp::RegExpObjectImp(ExecState* exec,
236 FunctionPrototype *funcProto,
237 RegExpPrototype *regProto)
238
239 : InternalFunctionImp(funcProto),
240 d(new RegExpObjectImpPrivate)
241{
242 // ECMA 15.10.5.1 RegExp.prototype
243 putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly);
244
245 // no. of arguments for constructor
246 putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum);
247}
248
249void RegExpObjectImp::throwRegExpError(ExecState* exec)
250{
251 throwError(exec, RangeError, "Resource exhaustion trying to perform regexp match.");
252}
253
254/*
255 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
256 expression matching through the performMatch function. We use cached results to calculate,
257 e.g., RegExp.lastMatch and RegExp.leftParen.
258*/
259UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const RegExpStringContext& c,
260 const UString& s,
261 int startOffset, int *endOffset, int **ovector)
262{
263 int tmpOffset;
264 int *tmpOvector;
265 bool error = false;
266 UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector);
267 if (error) {
268 if (endOffset)
269 *endOffset = -1;
270 throwRegExpError(exec);
271 return match;
272 }
273
274 if (endOffset)
275 *endOffset = tmpOffset;
276 if (ovector)
277 *ovector = tmpOvector;
278
279 if (!match.isNull()) {
280 ASSERT(tmpOvector);
281
282 d->lastInput = s;
283 d->lastOvector.set(tmpOvector);
284 d->lastNumSubPatterns = r->subPatterns();
285 }
286
287 return match;
288}
289
290JSObject *RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
291{
292 List list;
293 // The returned array contains 'result' as first item, followed by the list of matches
294 list.append(jsString(result));
295 if ( d->lastOvector )
296 for ( int i = 1 ; i < d->lastNumSubPatterns + 1 ; ++i )
297 {
298 int start = d->lastOvector[2*i];
299 if (start == -1)
300 list.append(jsUndefined());
301 else {
302 UString substring = d->lastInput.substr( start, d->lastOvector[2*i+1] - start );
303 list.append(jsString(substring));
304 }
305 }
306 JSObject *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list);
307 arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0]));
308 arr->put(exec, exec->propertyNames().input, jsString(d->lastInput));
309 return arr;
310}
311
312JSValue *RegExpObjectImp::getBackref(int i) const
313{
314 if (d->lastOvector && i < int(d->lastNumSubPatterns + 1)) {
315 UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i] );
316 return jsString(substring);
317 }
318
319 return jsString("");
320}
321
322JSValue *RegExpObjectImp::getLastMatch() const
323{
324 if (d->lastOvector) {
325 UString substring = d->lastInput.substr(d->lastOvector[0], d->lastOvector[1] - d->lastOvector[0]);
326 return jsString(substring);
327 }
328
329 return jsString("");
330}
331
332JSValue *RegExpObjectImp::getLastParen() const
333{
334 int i = d->lastNumSubPatterns;
335 if (i > 0) {
336 ASSERT(d->lastOvector);
337 UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i]);
338 return jsString(substring);
339 }
340
341 return jsString("");
342}
343
344JSValue *RegExpObjectImp::getLeftContext() const
345{
346 if (d->lastOvector) {
347 UString substring = d->lastInput.substr(0, d->lastOvector[0]);
348 return jsString(substring);
349 }
350
351 return jsString("");
352}
353
354JSValue *RegExpObjectImp::getRightContext() const
355{
356 if (d->lastOvector) {
357 UString s = d->lastInput;
358 UString substring = s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]);
359 return jsString(substring);
360 }
361
362 return jsString("");
363}
364
365bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot)
366{
367 return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot);
368}
369
370JSValue *RegExpObjectImp::getValueProperty(ExecState*, int token) const
371{
372 switch (token) {
373 case Dollar1:
374 return getBackref(1);
375 case Dollar2:
376 return getBackref(2);
377 case Dollar3:
378 return getBackref(3);
379 case Dollar4:
380 return getBackref(4);
381 case Dollar5:
382 return getBackref(5);
383 case Dollar6:
384 return getBackref(6);
385 case Dollar7:
386 return getBackref(7);
387 case Dollar8:
388 return getBackref(8);
389 case Dollar9:
390 return getBackref(9);
391 case Input:
392 return jsString(d->lastInput);
393 case Multiline:
394 return jsBoolean(d->multiline);
395 case LastMatch:
396 return getLastMatch();
397 case LastParen:
398 return getLastParen();
399 case LeftContext:
400 return getLeftContext();
401 case RightContext:
402 return getRightContext();
403 default:
404 ASSERT(0);
405 }
406
407 return jsString("");
408}
409
410void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr)
411{
412 lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this);
413}
414
415void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int /*attr*/)
416{
417 switch (token) {
418 case Input:
419 d->lastInput = value->toString(exec);
420 break;
421 case Multiline:
422 d->multiline = value->toBoolean(exec);
423 break;
424 default:
425 ASSERT(0);
426 }
427}
428
429bool RegExpObjectImp::implementsConstruct() const
430{
431 return true;
432}
433
434RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, JSValue *flagsInput)
435{
436 int reflags = RegExp::None;
437
438 if (!flagsInput->isUndefined()) {
439 const UString flags = flagsInput->toString(exec);
440
441 // Check flags
442 for (int pos = 0; pos < flags.size(); ++pos) {
443 switch (flags[pos].unicode()) {
444 case 'g':
445 if (reflags & RegExp::Global) {
446 throwError(exec, SyntaxError,
447 "Regular expression flag 'g' given twice", 1, -1, "<regexp>");
448 return 0;
449 }
450 reflags |= RegExp::Global;
451 break;
452 case 'i':
453 if (reflags & RegExp::IgnoreCase) {
454 throwError(exec, SyntaxError,
455 "Regular expression flag 'i' given twice", 1, -1, "<regexp>");
456 return 0;
457 }
458 reflags |= RegExp::IgnoreCase;
459 break;
460 case 'm':
461 if (reflags & RegExp::Multiline) {
462 throwError(exec, SyntaxError,
463 "Regular expression flag 'm' given twice", 1, -1, "<regexp>");
464 return 0;
465 }
466 reflags |= RegExp::Multiline;
467 break;
468 default: {
469 throwError(exec, SyntaxError,
470 "Invalid regular expression flags", 1, -1, "<regexp>");
471 return 0;
472 }
473 }
474 }
475 }
476
477 RegExp *re = new RegExp(p, reflags);
478 if (!re->isValid()) {
479 throwError(exec, SyntaxError,
480 "Invalid regular expression", 1, -1, "<regexp>");
481 delete re;
482 return 0;
483 }
484 return re;
485}
486
487
488// ECMA 15.10.4
489JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args)
490{
491 JSObject *o = args[0]->getObject();
492 if (o && o->inherits(&RegExpImp::info)) {
493 if (!args[1]->isUndefined())
494 return throwError(exec, TypeError);
495 return o;
496 }
497
498 UString p = args[0]->isUndefined() ? UString("") : args[0]->toString(exec);
499
500 RegExp* re = makeEngine(exec, p, args[1]);
501 if (!re)
502 return exec->exception()->toObject(exec);
503
504
505 RegExpPrototype *proto = static_cast<RegExpPrototype*>(exec->lexicalInterpreter()->builtinRegExpPrototype());
506 RegExpImp *dat = new RegExpImp(proto);
507
508 dat->setRegExp(exec, re);
509
510 return dat;
511}
512
513// ECMA 15.10.3
514JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args)
515{
516 // The RegExp argument case is handled by construct()
517
518 return construct(exec, args);
519}
520