2008-09-09 20:08:45 +00:00
|
|
|
// Copyright 2006-2008 the V8 project authors. All rights reserved.
|
2008-07-03 15:10:15 +00:00
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
|
|
// modification, are permitted provided that the following conditions are
|
|
|
|
// met:
|
|
|
|
//
|
|
|
|
// * Redistributions of source code must retain the above copyright
|
|
|
|
// notice, this list of conditions and the following disclaimer.
|
|
|
|
// * Redistributions in binary form must reproduce the above
|
|
|
|
// copyright notice, this list of conditions and the following
|
|
|
|
// disclaimer in the documentation and/or other materials provided
|
|
|
|
// with the distribution.
|
|
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
|
|
// contributors may be used to endorse or promote products derived
|
|
|
|
// from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
#include "v8.h"
|
|
|
|
|
|
|
|
#include "execution.h"
|
|
|
|
#include "factory.h"
|
|
|
|
#include "jsregexp.h"
|
|
|
|
#include "platform.h"
|
2008-10-02 15:35:28 +00:00
|
|
|
#include "runtime.h"
|
2008-07-03 15:10:15 +00:00
|
|
|
#include "top.h"
|
2008-10-24 08:40:02 +00:00
|
|
|
#include "compilation-cache.h"
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-10-27 09:48:47 +00:00
|
|
|
// Including pcre.h undefines DEBUG to avoid getting debug output from
|
|
|
|
// the JSCRE implementation. Make sure to redefine it in debug mode
|
|
|
|
// after having included the header file.
|
|
|
|
#ifdef DEBUG
|
|
|
|
#include "third_party/jscre/pcre.h"
|
|
|
|
#define DEBUG
|
|
|
|
#else
|
|
|
|
#include "third_party/jscre/pcre.h"
|
|
|
|
#endif
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
namespace v8 { namespace internal {
|
|
|
|
|
|
|
|
|
|
|
|
#define CAPTURE_INDEX 0
|
|
|
|
#define INTERNAL_INDEX 1
|
|
|
|
|
|
|
|
static Failure* malloc_failure;
|
|
|
|
|
|
|
|
static void* JSREMalloc(size_t size) {
|
|
|
|
Object* obj = Heap::AllocateByteArray(size);
|
|
|
|
|
|
|
|
// If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile
|
|
|
|
// will return NULL to the caller, performs GC there.
|
|
|
|
// Also pass failure information to the caller.
|
|
|
|
if (obj->IsFailure()) {
|
|
|
|
malloc_failure = Failure::cast(obj);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note: object is unrooted, the caller of jsRegExpCompile must
|
|
|
|
// create a handle for the return value before doing heap allocation.
|
|
|
|
return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void JSREFree(void* p) {
|
|
|
|
USE(p); // Do nothing, memory is garbage collected.
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-09 12:18:48 +00:00
|
|
|
String* RegExpImpl::last_ascii_string_ = NULL;
|
|
|
|
String* RegExpImpl::two_byte_cached_string_ = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
void RegExpImpl::NewSpaceCollectionPrologue() {
|
|
|
|
// The two byte string is always in the old space. The Ascii string may be
|
|
|
|
// in either place. If it is in the old space we don't need to do anything.
|
|
|
|
if (Heap::InNewSpace(last_ascii_string_)) {
|
|
|
|
// Invalidate the cache.
|
|
|
|
last_ascii_string_ = NULL;
|
|
|
|
two_byte_cached_string_ = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void RegExpImpl::OldSpaceCollectionPrologue() {
|
|
|
|
last_ascii_string_ = NULL;
|
|
|
|
two_byte_cached_string_ = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-08-22 13:33:59 +00:00
|
|
|
Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
|
|
|
|
Handle<String> pattern,
|
2008-07-03 15:10:15 +00:00
|
|
|
Handle<String> flags,
|
|
|
|
bool* has_pending_exception) {
|
2008-08-22 13:33:59 +00:00
|
|
|
// Ensure that the constructor function has been loaded.
|
|
|
|
if (!constructor->IsLoaded()) {
|
|
|
|
LoadLazy(constructor, has_pending_exception);
|
2008-07-03 15:10:15 +00:00
|
|
|
if (*has_pending_exception) return Handle<Object>(Failure::Exception());
|
|
|
|
}
|
|
|
|
// Call the construct code with 2 arguments.
|
|
|
|
Object** argv[2] = { Handle<Object>::cast(pattern).location(),
|
|
|
|
Handle<Object>::cast(flags).location() };
|
2008-08-22 13:33:59 +00:00
|
|
|
return Execution::New(constructor, 2, argv, has_pending_exception);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-09 12:18:48 +00:00
|
|
|
// Converts a source string to a 16 bit flat string or a SlicedString containing
|
|
|
|
// a 16 bit flat string).
|
|
|
|
Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
|
|
|
|
if (*subject == last_ascii_string_) {
|
|
|
|
ASSERT(two_byte_cached_string_ != NULL);
|
|
|
|
return Handle<String>(String::cast(two_byte_cached_string_));
|
|
|
|
}
|
|
|
|
Handle<String> two_byte_string = StringToTwoByte(subject);
|
|
|
|
last_ascii_string_ = *subject;
|
|
|
|
two_byte_cached_string_ = *two_byte_string;
|
|
|
|
return two_byte_string;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Converts a source string to a 16 bit flat string or a SlicedString containing
|
|
|
|
// a 16 bit flat string).
|
|
|
|
Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
|
2008-11-03 10:16:05 +00:00
|
|
|
StringShape shape(*pattern);
|
|
|
|
if (!pattern->IsFlat(shape)) {
|
2008-10-09 12:18:48 +00:00
|
|
|
FlattenString(pattern);
|
|
|
|
}
|
2008-11-03 10:16:05 +00:00
|
|
|
Handle<String> flat_string(shape.IsCons() ?
|
2008-10-09 12:18:48 +00:00
|
|
|
String::cast(ConsString::cast(*pattern)->first()) :
|
|
|
|
*pattern);
|
2008-11-03 10:16:05 +00:00
|
|
|
ASSERT(flat_string->IsString());
|
|
|
|
StringShape flat_shape(*flat_string);
|
|
|
|
ASSERT(!flat_shape.IsCons());
|
|
|
|
ASSERT(flat_shape.IsSequential() ||
|
|
|
|
flat_shape.IsSliced() ||
|
|
|
|
flat_shape.IsExternal());
|
|
|
|
if (!flat_shape.IsAsciiRepresentation()) {
|
2008-10-09 12:18:48 +00:00
|
|
|
return flat_string;
|
|
|
|
}
|
|
|
|
|
2008-11-03 10:16:05 +00:00
|
|
|
int len = flat_string->length(flat_shape);
|
2008-10-09 12:18:48 +00:00
|
|
|
Handle<String> two_byte_string =
|
2008-11-03 10:16:05 +00:00
|
|
|
Factory::NewRawTwoByteString(len, TENURED);
|
|
|
|
uc16* dest = SeqTwoByteString::cast(*two_byte_string)->GetChars();
|
|
|
|
String::WriteToFlat(*flat_string, flat_shape, dest, 0, len);
|
2008-10-09 12:18:48 +00:00
|
|
|
return two_byte_string;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-24 08:40:02 +00:00
|
|
|
static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
|
|
|
|
int flags = JSRegExp::NONE;
|
2008-11-03 10:16:05 +00:00
|
|
|
StringShape shape(*str);
|
|
|
|
for (int i = 0; i < str->length(shape); i++) {
|
|
|
|
switch (str->Get(shape, i)) {
|
2008-10-24 08:40:02 +00:00
|
|
|
case 'i':
|
|
|
|
flags |= JSRegExp::IGNORE_CASE;
|
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
flags |= JSRegExp::GLOBAL;
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
flags |= JSRegExp::MULTILINE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return JSRegExp::Flags(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-02 15:35:28 +00:00
|
|
|
unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char;
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
|
|
|
|
Handle<String> pattern,
|
2008-10-24 08:40:02 +00:00
|
|
|
Handle<String> flag_str) {
|
|
|
|
JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
|
|
|
|
Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
|
|
|
|
bool in_cache = !cached.is_null();
|
2008-10-02 15:35:28 +00:00
|
|
|
Handle<Object> result;
|
2008-11-03 10:16:05 +00:00
|
|
|
StringShape shape(*pattern);
|
2008-10-24 08:40:02 +00:00
|
|
|
if (in_cache) {
|
|
|
|
re->set_data(*cached);
|
|
|
|
result = re;
|
2008-10-02 15:35:28 +00:00
|
|
|
} else {
|
2008-10-24 08:40:02 +00:00
|
|
|
bool is_atom = !flags.is_ignore_case();
|
2008-11-03 10:16:05 +00:00
|
|
|
for (int i = 0; is_atom && i < pattern->length(shape); i++) {
|
|
|
|
if (is_reg_exp_special_char.get(pattern->Get(shape, i)))
|
2008-10-24 08:40:02 +00:00
|
|
|
is_atom = false;
|
|
|
|
}
|
|
|
|
if (is_atom) {
|
|
|
|
result = AtomCompile(re, pattern, flags);
|
|
|
|
} else {
|
|
|
|
result = JsreCompile(re, pattern, flags);
|
|
|
|
}
|
|
|
|
Object* data = re->data();
|
|
|
|
if (data->IsFixedArray()) {
|
|
|
|
// If compilation succeeded then the data is set on the regexp
|
|
|
|
// and we can store it in the cache.
|
|
|
|
Handle<FixedArray> data(FixedArray::cast(re->data()));
|
|
|
|
CompilationCache::PutRegExp(pattern, flags, data);
|
|
|
|
}
|
2008-10-02 15:35:28 +00:00
|
|
|
}
|
|
|
|
|
2008-10-24 08:40:02 +00:00
|
|
|
LOG(RegExpCompileEvent(re, in_cache));
|
2008-10-02 15:35:28 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
|
|
|
|
Handle<String> subject,
|
|
|
|
Handle<Object> index) {
|
2008-10-24 08:40:02 +00:00
|
|
|
switch (regexp->TypeTag()) {
|
2008-10-02 15:35:28 +00:00
|
|
|
case JSRegExp::JSCRE:
|
|
|
|
return JsreExec(regexp, subject, index);
|
|
|
|
case JSRegExp::ATOM:
|
|
|
|
return AtomExec(regexp, subject, index);
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
return Handle<Object>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
|
|
|
|
Handle<String> subject) {
|
2008-10-24 08:40:02 +00:00
|
|
|
switch (regexp->TypeTag()) {
|
2008-10-02 15:35:28 +00:00
|
|
|
case JSRegExp::JSCRE:
|
2008-10-09 12:18:48 +00:00
|
|
|
return JsreExecGlobal(regexp, subject);
|
2008-10-02 15:35:28 +00:00
|
|
|
case JSRegExp::ATOM:
|
|
|
|
return AtomExecGlobal(regexp, subject);
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
return Handle<Object>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
|
2008-10-24 08:40:02 +00:00
|
|
|
Handle<String> pattern,
|
|
|
|
JSRegExp::Flags flags) {
|
|
|
|
Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern);
|
2008-10-02 15:35:28 +00:00
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
|
|
|
|
Handle<String> subject,
|
|
|
|
Handle<Object> index) {
|
2008-10-24 08:40:02 +00:00
|
|
|
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
|
2008-10-02 15:35:28 +00:00
|
|
|
|
|
|
|
uint32_t start_index;
|
|
|
|
if (!Array::IndexFromObject(*index, &start_index)) {
|
|
|
|
return Handle<Smi>(Smi::FromInt(-1));
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(RegExpExecEvent(re, start_index, subject));
|
2008-10-07 13:25:49 +00:00
|
|
|
int value = Runtime::StringMatch(subject, needle, start_index);
|
2008-10-02 15:35:28 +00:00
|
|
|
if (value == -1) return Factory::null_value();
|
2008-10-09 13:34:17 +00:00
|
|
|
|
|
|
|
Handle<FixedArray> array = Factory::NewFixedArray(2);
|
2008-10-23 08:46:32 +00:00
|
|
|
array->set(0,
|
|
|
|
Smi::FromInt(value),
|
|
|
|
SKIP_WRITE_BARRIER);
|
|
|
|
array->set(1,
|
|
|
|
Smi::FromInt(value + needle->length()),
|
|
|
|
SKIP_WRITE_BARRIER);
|
2008-10-09 13:34:17 +00:00
|
|
|
return Factory::NewJSArrayWithElements(array);
|
2008-10-02 15:35:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
|
|
|
|
Handle<String> subject) {
|
2008-10-24 08:40:02 +00:00
|
|
|
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
|
2008-10-02 15:35:28 +00:00
|
|
|
Handle<JSArray> result = Factory::NewJSArray(1);
|
|
|
|
int index = 0;
|
|
|
|
int match_count = 0;
|
2008-10-07 08:11:44 +00:00
|
|
|
int subject_length = subject->length();
|
2008-10-02 15:35:28 +00:00
|
|
|
int needle_length = needle->length();
|
2008-10-07 08:11:44 +00:00
|
|
|
while (true) {
|
2008-10-02 15:35:28 +00:00
|
|
|
LOG(RegExpExecEvent(re, index, subject));
|
2008-10-07 08:11:44 +00:00
|
|
|
int value = -1;
|
|
|
|
if (index + needle_length <= subject_length) {
|
2008-10-07 13:25:49 +00:00
|
|
|
value = Runtime::StringMatch(subject, needle, index);
|
2008-10-07 08:11:44 +00:00
|
|
|
}
|
2008-10-02 15:35:28 +00:00
|
|
|
if (value == -1) break;
|
|
|
|
HandleScope scope;
|
|
|
|
int end = value + needle_length;
|
2008-10-09 13:34:17 +00:00
|
|
|
|
|
|
|
Handle<FixedArray> array = Factory::NewFixedArray(2);
|
2008-10-23 08:46:32 +00:00
|
|
|
array->set(0,
|
|
|
|
Smi::FromInt(value),
|
|
|
|
SKIP_WRITE_BARRIER);
|
|
|
|
array->set(1,
|
|
|
|
Smi::FromInt(end),
|
|
|
|
SKIP_WRITE_BARRIER);
|
2008-10-09 13:34:17 +00:00
|
|
|
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
|
2008-10-02 15:35:28 +00:00
|
|
|
SetElement(result, match_count, pair);
|
|
|
|
match_count++;
|
|
|
|
index = end;
|
2008-10-09 13:34:17 +00:00
|
|
|
if (needle_length == 0) index++;
|
2008-10-02 15:35:28 +00:00
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-30 11:12:08 +00:00
|
|
|
static inline Object* DoCompile(String* pattern,
|
|
|
|
JSRegExp::Flags flags,
|
|
|
|
unsigned* number_of_captures,
|
|
|
|
const char** error_message,
|
|
|
|
JscreRegExp** code) {
|
2008-10-24 08:40:02 +00:00
|
|
|
JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
|
|
|
|
? JSRegExpIgnoreCase
|
|
|
|
: JSRegExpDoNotIgnoreCase;
|
|
|
|
JSRegExpMultilineOption multiline_option = flags.is_multiline()
|
|
|
|
? JSRegExpMultiline
|
|
|
|
: JSRegExpSingleLine;
|
2008-10-30 11:12:08 +00:00
|
|
|
*error_message = NULL;
|
|
|
|
malloc_failure = Failure::Exception();
|
|
|
|
*code = jsRegExpCompile(pattern->GetTwoByteData(),
|
|
|
|
pattern->length(),
|
|
|
|
case_option,
|
|
|
|
multiline_option,
|
|
|
|
number_of_captures,
|
|
|
|
error_message,
|
|
|
|
&JSREMalloc,
|
|
|
|
&JSREFree);
|
2008-10-30 11:35:48 +00:00
|
|
|
if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
|
2008-10-30 11:12:08 +00:00
|
|
|
malloc_failure->IsOutOfMemoryFailure())) {
|
|
|
|
return malloc_failure;
|
|
|
|
} else {
|
|
|
|
// It doesn't matter which object we return here, we just need to return
|
|
|
|
// a non-failure to indicate to the GC-retry code that there was no
|
|
|
|
// allocation failure.
|
|
|
|
return pattern;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompileWithRetryAfterGC(Handle<String> pattern,
|
|
|
|
JSRegExp::Flags flags,
|
|
|
|
unsigned* number_of_captures,
|
|
|
|
const char** error_message,
|
|
|
|
JscreRegExp** code) {
|
|
|
|
CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
|
|
|
|
flags,
|
|
|
|
number_of_captures,
|
|
|
|
error_message,
|
|
|
|
code));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
|
|
|
|
Handle<String> pattern,
|
|
|
|
JSRegExp::Flags flags) {
|
2008-10-09 12:18:48 +00:00
|
|
|
Handle<String> two_byte_pattern = StringToTwoByte(pattern);
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
unsigned number_of_captures;
|
|
|
|
const char* error_message = NULL;
|
|
|
|
|
2008-10-09 11:26:37 +00:00
|
|
|
JscreRegExp* code = NULL;
|
2008-10-09 10:30:33 +00:00
|
|
|
FlattenString(pattern);
|
|
|
|
|
2008-10-30 11:12:08 +00:00
|
|
|
CompileWithRetryAfterGC(two_byte_pattern,
|
|
|
|
flags,
|
|
|
|
&number_of_captures,
|
|
|
|
&error_message,
|
|
|
|
&code);
|
|
|
|
|
|
|
|
if (code == NULL) {
|
|
|
|
// Throw an exception.
|
|
|
|
Handle<JSArray> array = Factory::NewJSArray(2);
|
|
|
|
SetElement(array, 0, pattern);
|
|
|
|
SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
|
|
|
|
(error_message == NULL) ? "Unknown regexp error" : error_message)));
|
|
|
|
Handle<Object> regexp_err =
|
|
|
|
Factory::NewSyntaxError("malformed_regexp", array);
|
2008-11-04 16:23:56 +00:00
|
|
|
Top::Throw(*regexp_err);
|
|
|
|
return Handle<Object>();
|
2008-10-30 11:12:08 +00:00
|
|
|
}
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-10-30 11:12:08 +00:00
|
|
|
// Convert the return address to a ByteArray pointer.
|
|
|
|
Handle<ByteArray> internal(
|
|
|
|
ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-10-30 11:12:08 +00:00
|
|
|
Handle<FixedArray> value = Factory::NewFixedArray(2);
|
|
|
|
value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
|
|
|
|
value->set(INTERNAL_INDEX, *internal);
|
|
|
|
Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-10-30 11:12:08 +00:00
|
|
|
return re;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-09-23 11:45:43 +00:00
|
|
|
Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
|
2008-07-03 15:10:15 +00:00
|
|
|
int num_captures,
|
|
|
|
Handle<String> subject,
|
|
|
|
int previous_index,
|
2008-10-09 12:18:48 +00:00
|
|
|
const uc16* two_byte_subject,
|
2008-07-03 15:10:15 +00:00
|
|
|
int* offsets_vector,
|
|
|
|
int offsets_vector_length) {
|
|
|
|
int rc;
|
|
|
|
{
|
|
|
|
AssertNoAllocation a;
|
|
|
|
ByteArray* internal = JsreInternal(regexp);
|
2008-09-23 11:45:43 +00:00
|
|
|
const JscreRegExp* js_regexp =
|
|
|
|
reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-09-11 11:24:45 +00:00
|
|
|
LOG(RegExpExecEvent(regexp, previous_index, subject));
|
|
|
|
|
2008-10-09 12:18:48 +00:00
|
|
|
rc = jsRegExpExecute(js_regexp,
|
|
|
|
two_byte_subject,
|
|
|
|
subject->length(),
|
|
|
|
previous_index,
|
|
|
|
offsets_vector,
|
|
|
|
offsets_vector_length);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The KJS JavaScript engine returns null (ie, a failed match) when
|
|
|
|
// JSRE's internal match limit is exceeded. We duplicate that behavior here.
|
|
|
|
if (rc == JSRegExpErrorNoMatch
|
|
|
|
|| rc == JSRegExpErrorHitLimit) {
|
|
|
|
return Factory::null_value();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Other JSRE errors:
|
|
|
|
if (rc < 0) {
|
|
|
|
// Throw an exception.
|
|
|
|
Handle<Object> code(Smi::FromInt(rc));
|
|
|
|
Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
|
|
|
|
Handle<Object> regexp_err(
|
|
|
|
Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
|
|
|
|
return Handle<Object>(Top::Throw(*regexp_err));
|
|
|
|
}
|
|
|
|
|
2008-10-09 13:34:17 +00:00
|
|
|
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
|
2008-07-03 15:10:15 +00:00
|
|
|
// The captures come in (start, end+1) pairs.
|
|
|
|
for (int i = 0; i < 2 * (num_captures+1); i += 2) {
|
2008-10-23 08:46:32 +00:00
|
|
|
array->set(i,
|
|
|
|
Smi::FromInt(offsets_vector[i]),
|
|
|
|
SKIP_WRITE_BARRIER);
|
|
|
|
array->set(i+1,
|
|
|
|
Smi::FromInt(offsets_vector[i+1]),
|
|
|
|
SKIP_WRITE_BARRIER);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
2008-10-09 13:34:17 +00:00
|
|
|
return Factory::NewJSArrayWithElements(array);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class OffsetsVector {
|
|
|
|
public:
|
|
|
|
inline OffsetsVector(int num_captures) {
|
|
|
|
offsets_vector_length_ = (num_captures + 1) * 3;
|
|
|
|
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
|
|
|
|
vector_ = NewArray<int>(offsets_vector_length_);
|
|
|
|
} else {
|
|
|
|
vector_ = static_offsets_vector_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline ~OffsetsVector() {
|
|
|
|
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
|
|
|
|
DeleteArray(vector_);
|
|
|
|
vector_ = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline int* vector() {
|
|
|
|
return vector_;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline int length() {
|
|
|
|
return offsets_vector_length_;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
int* vector_;
|
|
|
|
int offsets_vector_length_;
|
|
|
|
static const int kStaticOffsetsVectorSize = 30;
|
|
|
|
static int static_offsets_vector_[kStaticOffsetsVectorSize];
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
int OffsetsVector::static_offsets_vector_[
|
|
|
|
OffsetsVector::kStaticOffsetsVectorSize];
|
|
|
|
|
|
|
|
|
2008-09-23 11:45:43 +00:00
|
|
|
Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
|
2008-07-03 15:10:15 +00:00
|
|
|
Handle<String> subject,
|
|
|
|
Handle<Object> index) {
|
|
|
|
// Prepare space for the return values.
|
|
|
|
int num_captures = JsreCapture(regexp);
|
|
|
|
|
|
|
|
OffsetsVector offsets(num_captures);
|
|
|
|
|
|
|
|
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
|
|
|
|
|
2008-10-09 12:18:48 +00:00
|
|
|
Handle<String> subject16 = CachedStringToTwoByte(subject);
|
|
|
|
|
|
|
|
Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
|
|
|
|
previous_index,
|
|
|
|
subject16->GetTwoByteData(),
|
|
|
|
offsets.vector(), offsets.length()));
|
|
|
|
|
|
|
|
return result;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-09-23 11:45:43 +00:00
|
|
|
Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
|
2008-10-09 12:18:48 +00:00
|
|
|
Handle<String> subject) {
|
2008-07-03 15:10:15 +00:00
|
|
|
// Prepare space for the return values.
|
|
|
|
int num_captures = JsreCapture(regexp);
|
|
|
|
|
|
|
|
OffsetsVector offsets(num_captures);
|
|
|
|
|
|
|
|
int previous_index = 0;
|
|
|
|
|
2008-10-09 13:34:17 +00:00
|
|
|
Handle<JSArray> result = Factory::NewJSArray(0);
|
2008-07-03 15:10:15 +00:00
|
|
|
int i = 0;
|
|
|
|
Handle<Object> matches;
|
|
|
|
|
2008-10-09 12:18:48 +00:00
|
|
|
Handle<String> subject16 = CachedStringToTwoByte(subject);
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
do {
|
|
|
|
if (previous_index > subject->length() || previous_index < 0) {
|
|
|
|
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
|
|
|
|
// string length, there is no match.
|
|
|
|
matches = Factory::null_value();
|
|
|
|
} else {
|
2008-10-09 12:18:48 +00:00
|
|
|
matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
|
|
|
|
subject16->GetTwoByteData(),
|
|
|
|
offsets.vector(), offsets.length());
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
if (matches->IsJSArray()) {
|
|
|
|
SetElement(result, i, matches);
|
|
|
|
i++;
|
|
|
|
previous_index = offsets.vector()[1];
|
|
|
|
if (offsets.vector()[0] == offsets.vector()[1]) {
|
|
|
|
previous_index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (matches->IsJSArray());
|
|
|
|
|
|
|
|
// If we exited the loop with an exception, throw it.
|
|
|
|
if (matches->IsNull()) { // Exited loop normally.
|
|
|
|
return result;
|
|
|
|
} else { // Exited loop with the exception in matches.
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-09-23 11:45:43 +00:00
|
|
|
int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
|
2008-10-24 08:40:02 +00:00
|
|
|
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
|
|
|
|
return Smi::cast(value->get(CAPTURE_INDEX))->value();
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-09-23 11:45:43 +00:00
|
|
|
ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
|
2008-10-24 08:40:02 +00:00
|
|
|
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
|
|
|
|
return ByteArray::cast(value->get(INTERNAL_INDEX));
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}} // namespace v8::internal
|