// Copyright 2006-2008 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "v8.h" #include "execution.h" #include "factory.h" #include "jsregexp.h" #include "platform.h" #include "runtime.h" #include "top.h" #include "compilation-cache.h" // Including pcre.h undefines DEBUG to avoid getting debug output from // the JSCRE implementation. Make sure to redefine it in debug mode // after having included the header file. #ifdef DEBUG #include "third_party/jscre/pcre.h" #define DEBUG #else #include "third_party/jscre/pcre.h" #endif namespace v8 { namespace internal { #define CAPTURE_INDEX 0 #define INTERNAL_INDEX 1 static Failure* malloc_failure; static void* JSREMalloc(size_t size) { Object* obj = Heap::AllocateByteArray(size); // If allocation failed, return a NULL pointer to JSRE, and jsRegExpCompile // will return NULL to the caller, performs GC there. // Also pass failure information to the caller. if (obj->IsFailure()) { malloc_failure = Failure::cast(obj); return NULL; } // Note: object is unrooted, the caller of jsRegExpCompile must // create a handle for the return value before doing heap allocation. return reinterpret_cast(ByteArray::cast(obj)->GetDataStartAddress()); } static void JSREFree(void* p) { USE(p); // Do nothing, memory is garbage collected. } String* RegExpImpl::last_ascii_string_ = NULL; String* RegExpImpl::two_byte_cached_string_ = NULL; void RegExpImpl::NewSpaceCollectionPrologue() { // The two byte string is always in the old space. The Ascii string may be // in either place. If it is in the old space we don't need to do anything. if (Heap::InNewSpace(last_ascii_string_)) { // Invalidate the cache. last_ascii_string_ = NULL; two_byte_cached_string_ = NULL; } } void RegExpImpl::OldSpaceCollectionPrologue() { last_ascii_string_ = NULL; two_byte_cached_string_ = NULL; } Handle RegExpImpl::CreateRegExpLiteral(Handle constructor, Handle pattern, Handle flags, bool* has_pending_exception) { // Ensure that the constructor function has been loaded. if (!constructor->IsLoaded()) { LoadLazy(constructor, has_pending_exception); if (*has_pending_exception) return Handle(Failure::Exception()); } // Call the construct code with 2 arguments. Object** argv[2] = { Handle::cast(pattern).location(), Handle::cast(flags).location() }; return Execution::New(constructor, 2, argv, has_pending_exception); } // Converts a source string to a 16 bit flat string or a SlicedString containing // a 16 bit flat string). Handle RegExpImpl::CachedStringToTwoByte(Handle subject) { if (*subject == last_ascii_string_) { ASSERT(two_byte_cached_string_ != NULL); return Handle(String::cast(two_byte_cached_string_)); } Handle two_byte_string = StringToTwoByte(subject); last_ascii_string_ = *subject; two_byte_cached_string_ = *two_byte_string; return two_byte_string; } // Converts a source string to a 16 bit flat string or a SlicedString containing // a 16 bit flat string). Handle RegExpImpl::StringToTwoByte(Handle pattern) { if (!pattern->IsFlat()) { FlattenString(pattern); } Handle flat_string(pattern->IsConsString() ? String::cast(ConsString::cast(*pattern)->first()) : *pattern); ASSERT(!flat_string->IsConsString()); ASSERT(flat_string->IsSeqString() || flat_string->IsSlicedString() || flat_string->IsExternalString()); if (!flat_string->IsAsciiRepresentation()) { return flat_string; } Handle two_byte_string = Factory::NewRawTwoByteString(flat_string->length(), TENURED); static StringInputBuffer convert_to_two_byte_buffer; convert_to_two_byte_buffer.Reset(*flat_string); for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) { two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext()); } return two_byte_string; } static JSRegExp::Flags RegExpFlagsFromString(Handle str) { int flags = JSRegExp::NONE; for (int i = 0; i < str->length(); i++) { switch (str->Get(i)) { case 'i': flags |= JSRegExp::IGNORE_CASE; break; case 'g': flags |= JSRegExp::GLOBAL; break; case 'm': flags |= JSRegExp::MULTILINE; break; } } return JSRegExp::Flags(flags); } unibrow::Predicate is_reg_exp_special_char; Handle RegExpImpl::Compile(Handle re, Handle pattern, Handle flag_str) { JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); Handle cached = CompilationCache::LookupRegExp(pattern, flags); bool in_cache = !cached.is_null(); Handle result; if (in_cache) { re->set_data(*cached); result = re; } else { bool is_atom = !flags.is_ignore_case(); for (int i = 0; is_atom && i < pattern->length(); i++) { if (is_reg_exp_special_char.get(pattern->Get(i))) is_atom = false; } if (is_atom) { result = AtomCompile(re, pattern, flags); } else { result = JsreCompile(re, pattern, flags); } Object* data = re->data(); if (data->IsFixedArray()) { // If compilation succeeded then the data is set on the regexp // and we can store it in the cache. Handle data(FixedArray::cast(re->data())); CompilationCache::PutRegExp(pattern, flags, data); } } LOG(RegExpCompileEvent(re, in_cache)); return result; } Handle RegExpImpl::Exec(Handle regexp, Handle subject, Handle index) { switch (regexp->TypeTag()) { case JSRegExp::JSCRE: return JsreExec(regexp, subject, index); case JSRegExp::ATOM: return AtomExec(regexp, subject, index); default: UNREACHABLE(); return Handle(); } } Handle RegExpImpl::ExecGlobal(Handle regexp, Handle subject) { switch (regexp->TypeTag()) { case JSRegExp::JSCRE: return JsreExecGlobal(regexp, subject); case JSRegExp::ATOM: return AtomExecGlobal(regexp, subject); default: UNREACHABLE(); return Handle(); } } Handle RegExpImpl::AtomCompile(Handle re, Handle pattern, JSRegExp::Flags flags) { Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern); return re; } Handle RegExpImpl::AtomExec(Handle re, Handle subject, Handle index) { Handle needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); uint32_t start_index; if (!Array::IndexFromObject(*index, &start_index)) { return Handle(Smi::FromInt(-1)); } LOG(RegExpExecEvent(re, start_index, subject)); int value = Runtime::StringMatch(subject, needle, start_index); if (value == -1) return Factory::null_value(); Handle array = Factory::NewFixedArray(2); array->set(0, Smi::FromInt(value), SKIP_WRITE_BARRIER); array->set(1, Smi::FromInt(value + needle->length()), SKIP_WRITE_BARRIER); return Factory::NewJSArrayWithElements(array); } Handle RegExpImpl::AtomExecGlobal(Handle re, Handle subject) { Handle needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); Handle result = Factory::NewJSArray(1); int index = 0; int match_count = 0; int subject_length = subject->length(); int needle_length = needle->length(); while (true) { LOG(RegExpExecEvent(re, index, subject)); int value = -1; if (index + needle_length <= subject_length) { value = Runtime::StringMatch(subject, needle, index); } if (value == -1) break; HandleScope scope; int end = value + needle_length; Handle array = Factory::NewFixedArray(2); array->set(0, Smi::FromInt(value), SKIP_WRITE_BARRIER); array->set(1, Smi::FromInt(end), SKIP_WRITE_BARRIER); Handle pair = Factory::NewJSArrayWithElements(array); SetElement(result, match_count, pair); match_count++; index = end; if (needle_length == 0) index++; } return result; } static inline Object* DoCompile(String* pattern, JSRegExp::Flags flags, unsigned* number_of_captures, const char** error_message, JscreRegExp** code) { JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; JSRegExpMultilineOption multiline_option = flags.is_multiline() ? JSRegExpMultiline : JSRegExpSingleLine; *error_message = NULL; malloc_failure = Failure::Exception(); *code = jsRegExpCompile(pattern->GetTwoByteData(), pattern->length(), case_option, multiline_option, number_of_captures, error_message, &JSREMalloc, &JSREFree); if (*code == NULL && (malloc_failure->IsRetryAfterGC() || malloc_failure->IsOutOfMemoryFailure())) { return malloc_failure; } else { // It doesn't matter which object we return here, we just need to return // a non-failure to indicate to the GC-retry code that there was no // allocation failure. return pattern; } } void CompileWithRetryAfterGC(Handle pattern, JSRegExp::Flags flags, unsigned* number_of_captures, const char** error_message, JscreRegExp** code) { CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern, flags, number_of_captures, error_message, code)); } Handle RegExpImpl::JsreCompile(Handle re, Handle pattern, JSRegExp::Flags flags) { Handle two_byte_pattern = StringToTwoByte(pattern); unsigned number_of_captures; const char* error_message = NULL; JscreRegExp* code = NULL; FlattenString(pattern); CompileWithRetryAfterGC(two_byte_pattern, flags, &number_of_captures, &error_message, &code); if (code == NULL) { // Throw an exception. Handle array = Factory::NewJSArray(2); SetElement(array, 0, pattern); SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( (error_message == NULL) ? "Unknown regexp error" : error_message))); Handle regexp_err = Factory::NewSyntaxError("malformed_regexp", array); return Handle(Top::Throw(*regexp_err)); } // Convert the return address to a ByteArray pointer. Handle internal( ByteArray::FromDataStartAddress(reinterpret_cast
(code))); Handle value = Factory::NewFixedArray(2); value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures)); value->set(INTERNAL_INDEX, *internal); Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); return re; } Handle RegExpImpl::JsreExecOnce(Handle regexp, int num_captures, Handle subject, int previous_index, const uc16* two_byte_subject, int* offsets_vector, int offsets_vector_length) { int rc; { AssertNoAllocation a; ByteArray* internal = JsreInternal(regexp); const JscreRegExp* js_regexp = reinterpret_cast(internal->GetDataStartAddress()); LOG(RegExpExecEvent(regexp, previous_index, subject)); rc = jsRegExpExecute(js_regexp, two_byte_subject, subject->length(), previous_index, offsets_vector, offsets_vector_length); } // The KJS JavaScript engine returns null (ie, a failed match) when // JSRE's internal match limit is exceeded. We duplicate that behavior here. if (rc == JSRegExpErrorNoMatch || rc == JSRegExpErrorHitLimit) { return Factory::null_value(); } // Other JSRE errors: if (rc < 0) { // Throw an exception. Handle code(Smi::FromInt(rc)); Handle args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code }; Handle regexp_err( Factory::NewTypeError("jsre_error", HandleVector(args, 2))); return Handle(Top::Throw(*regexp_err)); } Handle array = Factory::NewFixedArray(2 * (num_captures+1)); // The captures come in (start, end+1) pairs. for (int i = 0; i < 2 * (num_captures+1); i += 2) { array->set(i, Smi::FromInt(offsets_vector[i]), SKIP_WRITE_BARRIER); array->set(i+1, Smi::FromInt(offsets_vector[i+1]), SKIP_WRITE_BARRIER); } return Factory::NewJSArrayWithElements(array); } class OffsetsVector { public: inline OffsetsVector(int num_captures) { offsets_vector_length_ = (num_captures + 1) * 3; if (offsets_vector_length_ > kStaticOffsetsVectorSize) { vector_ = NewArray(offsets_vector_length_); } else { vector_ = static_offsets_vector_; } } inline ~OffsetsVector() { if (offsets_vector_length_ > kStaticOffsetsVectorSize) { DeleteArray(vector_); vector_ = NULL; } } inline int* vector() { return vector_; } inline int length() { return offsets_vector_length_; } private: int* vector_; int offsets_vector_length_; static const int kStaticOffsetsVectorSize = 30; static int static_offsets_vector_[kStaticOffsetsVectorSize]; }; int OffsetsVector::static_offsets_vector_[ OffsetsVector::kStaticOffsetsVectorSize]; Handle RegExpImpl::JsreExec(Handle regexp, Handle subject, Handle index) { // Prepare space for the return values. int num_captures = JsreCapture(regexp); OffsetsVector offsets(num_captures); int previous_index = static_cast(DoubleToInteger(index->Number())); Handle subject16 = CachedStringToTwoByte(subject); Handle result(JsreExecOnce(regexp, num_captures, subject, previous_index, subject16->GetTwoByteData(), offsets.vector(), offsets.length())); return result; } Handle RegExpImpl::JsreExecGlobal(Handle regexp, Handle subject) { // Prepare space for the return values. int num_captures = JsreCapture(regexp); OffsetsVector offsets(num_captures); int previous_index = 0; Handle result = Factory::NewJSArray(0); int i = 0; Handle matches; Handle subject16 = CachedStringToTwoByte(subject); do { if (previous_index > subject->length() || previous_index < 0) { // Per ECMA-262 15.10.6.2, if the previous index is greater than the // string length, there is no match. matches = Factory::null_value(); } else { matches = JsreExecOnce(regexp, num_captures, subject, previous_index, subject16->GetTwoByteData(), offsets.vector(), offsets.length()); if (matches->IsJSArray()) { SetElement(result, i, matches); i++; previous_index = offsets.vector()[1]; if (offsets.vector()[0] == offsets.vector()[1]) { previous_index++; } } } } while (matches->IsJSArray()); // If we exited the loop with an exception, throw it. if (matches->IsNull()) { // Exited loop normally. return result; } else { // Exited loop with the exception in matches. return matches; } } int RegExpImpl::JsreCapture(Handle re) { FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); return Smi::cast(value->get(CAPTURE_INDEX))->value(); } ByteArray* RegExpImpl::JsreInternal(Handle re) { FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); return ByteArray::cast(value->get(INTERNAL_INDEX)); } }} // namespace v8::internal