Separate native and interpreted regexp by compile time flag, not runtime.

Clean-up of RegExp code.

Review URL: http://codereview.chromium.org/155085


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2366 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2009-07-07 08:11:19 +00:00
parent 1bee2d8a34
commit 72de7ab74e
12 changed files with 166 additions and 152 deletions

View File

@ -95,7 +95,12 @@ ANDROID_LINKFLAGS = ['-nostdlib',
LIBRARY_FLAGS = {
'all': {
'CPPDEFINES': ['ENABLE_LOGGING_AND_PROFILING'],
'CPPPATH': [join(root_dir, 'src')]
'CPPPATH': [join(root_dir, 'src')],
'regexp:native': {
'arch:ia32' : {
'CPPDEFINES': ['V8_NATIVE_REGEXP']
}
}
},
'gcc': {
'all': {
@ -546,6 +551,11 @@ SIMPLE_OPTIONS = {
'default': ARCH_GUESS,
'help': 'the architecture to build for (' + ARCH_GUESS + ')'
},
'regexp': {
'values': ['native', 'interpreted'],
'default': 'native',
'help': 'Whether to use native or interpreted regexp implementation'
},
'snapshot': {
'values': ['on', 'off', 'nobuild'],
'default': 'off',
@ -677,6 +687,8 @@ def VerifyOptions(env):
return False
if not IsLegal(env, 'sample', ["shell", "process"]):
return False
if not IsLegal(env, 'regexp', ["native", "interpreted"]):
return False
if env['os'] == 'win32' and env['library'] == 'shared' and env['prof'] == 'on':
Abort("Profiling on windows only supported for static library.")
if env['prof'] == 'oprofile' and env['os'] != 'linux':

View File

@ -208,8 +208,6 @@ DEFINE_bool(preemption, false,
// Regexp
DEFINE_bool(trace_regexps, false, "trace regexp execution")
DEFINE_bool(regexp_native, true,
"use native code regexp implementation (IA32 only)")
DEFINE_bool(regexp_optimization, true, "generate optimized regexp code")
// Testing flags test/cctest/test-{flags,api,serialization}.cc

View File

@ -263,7 +263,6 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
// Irregexp implementation.
// Ensures that the regexp object contains a compiled version of the
// source for either ASCII or non-ASCII strings.
// If the compiled version doesn't already exist, it is compiled
@ -271,25 +270,26 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
// If compilation fails, an exception is thrown and this function
// returns false.
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
int index;
if (is_ascii) {
index = JSRegExp::kIrregexpASCIICodeIndex;
} else {
index = JSRegExp::kIrregexpUC16CodeIndex;
}
Object* entry = re->DataAt(index);
if (!entry->IsTheHole()) {
// A value has already been compiled.
if (entry->IsJSObject()) {
// If it's a JS value, it's an error.
Top::Throw(entry);
return false;
}
return true;
}
#ifdef V8_NATIVE_REGEXP
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsCode()) return true;
#else // ! V8_NATIVE_REGEXP (RegExp interpreter code)
if (re->DataAt(JSRegExp::code_index(is_ascii))->IsByteArray()) return true;
#endif
return CompileIrregexp(re, is_ascii);
}
bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
// Compile the RegExp.
CompilationZoneScope zone_scope(DELETE_ON_EXIT);
Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
if (entry->IsJSObject()) {
// If it's a JSObject, a previous compilation failed and threw this object.
// Re-throw the object without trying again.
Top::Throw(entry);
return false;
}
ASSERT(entry->IsTheHole());
JSRegExp::Flags flags = re->GetFlags();
@ -302,7 +302,7 @@ bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
FlatStringReader reader(pattern);
if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
ThrowRegExpException(re,
pattern,
compile_data.error,
@ -325,17 +325,15 @@ bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
Handle<Object> regexp_err =
Factory::NewSyntaxError("malformed_regexp", array);
Top::Throw(*regexp_err);
re->SetDataAt(index, *regexp_err);
re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
return false;
}
NoHandleAllocation no_handles;
FixedArray* data = FixedArray::cast(re->data());
data->set(index, result.code);
int register_max = IrregexpMaxRegisterCount(data);
Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
data->set(JSRegExp::code_index(is_ascii), result.code);
int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(data, result.num_registers);
SetIrregexpMaxRegisterCount(*data, result.num_registers);
}
return true;
@ -364,24 +362,12 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
int index;
if (is_ascii) {
index = JSRegExp::kIrregexpASCIICodeIndex;
} else {
index = JSRegExp::kIrregexpUC16CodeIndex;
}
return ByteArray::cast(re->get(index));
return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
}
Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
int index;
if (is_ascii) {
index = JSRegExp::kIrregexpASCIICodeIndex;
} else {
index = JSRegExp::kIrregexpUC16CodeIndex;
}
return Code::cast(re->get(index));
return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
}
@ -408,12 +394,14 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
int number_of_capture_registers =
(IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
#ifndef V8_NATIVE_REGEXP
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = jsregexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
#endif
if (!subject->IsFlat()) {
@ -422,88 +410,83 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
bool rc;
// We have to initialize this with something to make gcc happy but we can't
// initialize it with its real value until after the GC-causing things are
// over.
FixedArray* array = NULL;
Handle<FixedArray> array;
// Dispatch to the correct RegExp implementation.
Handle<String> original_subject = subject;
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
if (UseNativeRegexp()) {
#ifdef V8_NATIVE_REGEXP
#if V8_TARGET_ARCH_IA32
OffsetsVector captures(number_of_capture_registers);
int* captures_vector = captures.vector();
RegExpMacroAssemblerIA32::Result res;
do {
bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
}
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
res = RegExpMacroAssemblerIA32::Match(code,
subject,
captures_vector,
captures.length(),
previous_index);
// If result is RETRY, the string have changed representation, and we
// must restart from scratch.
} while (res == RegExpMacroAssemblerIA32::RETRY);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE);
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
if (!rc) return Factory::null_value();
array = last_match_info->elements();
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
SetCapture(array, i, captures_vector[i]);
SetCapture(array, i + 1, captures_vector[i + 1]);
}
#else // !V8_TARGET_ARCH_IA32
UNREACHABLE();
#endif
} else {
OffsetsVector captures(number_of_capture_registers);
int* captures_vector = captures.vector();
RegExpMacroAssemblerIA32::Result res;
do {
bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
}
// Now that we have done EnsureCompiledIrregexp we can get the number of
// registers.
int number_of_registers =
IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
OffsetsVector registers(number_of_registers);
int* register_vector = registers.vector();
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
register_vector[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
res = RegExpMacroAssemblerIA32::Match(code,
subject,
captures_vector,
captures.length(),
previous_index);
// If result is RETRY, the string have changed representation, and we
// must restart from scratch.
} while (res == RegExpMacroAssemblerIA32::RETRY);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE);
rc = IrregexpInterpreter::Match(byte_codes,
subject,
register_vector,
previous_index);
if (!rc) return Factory::null_value();
if (res != RegExpMacroAssemblerIA32::SUCCESS) return Factory::null_value();
array = last_match_info->elements();
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
SetCapture(array, i, register_vector[i]);
SetCapture(array, i + 1, register_vector[i + 1]);
}
array = Handle<FixedArray>(last_match_info->elements());
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
SetCapture(*array, i, captures_vector[i]);
SetCapture(*array, i + 1, captures_vector[i + 1]);
}
#else // !V8_TARGET_ARCH_IA32
UNREACHABLE();
#endif // V8_TARGET_ARCH_IA32
#else // !V8_NATIVE_REGEXP
bool is_ascii = subject->IsAsciiRepresentation();
if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
return Handle<Object>::null();
}
// Now that we have done EnsureCompiledIrregexp we can get the number of
// registers.
int number_of_registers =
IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
OffsetsVector registers(number_of_registers);
int* register_vector = registers.vector();
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
register_vector[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
if (!IrregexpInterpreter::Match(byte_codes,
subject,
register_vector,
previous_index)) {
return Factory::null_value();
}
SetLastCaptureCount(array, number_of_capture_registers);
SetLastSubject(array, *original_subject);
SetLastInput(array, *original_subject);
array = Handle<FixedArray>(last_match_info->elements());
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
SetCapture(*array, i, register_vector[i]);
SetCapture(*array, i + 1, register_vector[i + 1]);
}
#endif // V8_NATIVE_REGEXP
SetLastCaptureCount(*array, number_of_capture_registers);
SetLastSubject(*array, *subject);
SetLastInput(*array, *subject);
return last_match_info;
}
@ -4474,35 +4457,38 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info();
if (RegExpImpl::UseNativeRegexp()) {
#ifdef V8_NATIVE_REGEXP
#ifdef V8_TARGET_ARCH_ARM
UNREACHABLE();
// ARM native regexp not implemented yet.
UNREACHABLE();
#endif
#ifdef V8_TARGET_ARCH_X64
UNREACHABLE();
// X64 native regexp not implemented yet.
UNREACHABLE();
#endif
#ifdef V8_TARGET_ARCH_IA32
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
mode = RegExpMacroAssemblerIA32::ASCII;
} else {
mode = RegExpMacroAssemblerIA32::UC16;
}
RegExpMacroAssemblerIA32 macro_assembler(mode,
(data->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
mode = RegExpMacroAssemblerIA32::ASCII;
} else {
mode = RegExpMacroAssemblerIA32::UC16;
}
RegExpMacroAssemblerIA32 macro_assembler(mode,
(data->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif
#else // ! V8_NATIVE_REGEXP
// Interpreted regexp.
EmbeddedVector<byte, 1024> codes;
RegExpMacroAssemblerIrregexp macro_assembler(codes);
return compiler.Assemble(&macro_assembler,
node,
data->capture_count,
pattern);
#endif // V8_NATIVE_REGEXP
}
}} // namespace v8::internal

View File

@ -37,13 +37,15 @@ class RegExpMacroAssembler;
class RegExpImpl {
public:
static inline bool UseNativeRegexp() {
#ifdef V8_TARGET_ARCH_IA32
return FLAG_regexp_native;
// Whether V8 is compiled with native regexp support or not.
static bool UsesNativeRegExp() {
#ifdef V8_NATIVE_REGEXP
return true;
#else
return false;
return false;
#endif
}
// Creates a regular expression literal in the old space.
// This function calls the garbage collector if necessary.
static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
@ -148,7 +150,8 @@ class RegExpImpl {
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii);
static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
// Set the subject cache. The previous string buffer is not deleted, so the

View File

@ -714,7 +714,7 @@ void JSRegExp::JSRegExpVerify() {
break;
}
case JSRegExp::IRREGEXP: {
bool is_native = RegExpImpl::UseNativeRegexp();
bool is_native = RegExpImpl::UsesNativeRegExp();
FixedArray* arr = FixedArray::cast(data());
Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex);

View File

@ -3270,6 +3270,9 @@ class JSRegExp: public JSObject {
inline Object* DataAt(int index);
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object* value);
static int code_index(bool is_ascii) {
return is_ascii ? kIrregexpASCIICodeIndex : kIrregexpUC16CodeIndex;
}
static inline JSRegExp* cast(Object* obj);

View File

@ -1016,16 +1016,16 @@ static Object* Runtime_RegExpExec(Arguments args) {
ASSERT(args.length() == 4);
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_CHECKED(String, subject, 1);
// Due to the way the JS files are constructed this must be less than the
// Due to the way the JS calls are constructed this must be less than the
// length of a string, i.e. it is always a Smi. We check anyway for security.
CONVERT_CHECKED(Smi, index, args[2]);
CONVERT_SMI_CHECKED(index, args[2]);
CONVERT_ARG_CHECKED(JSArray, last_match_info, 3);
RUNTIME_ASSERT(last_match_info->HasFastElements());
RUNTIME_ASSERT(index->value() >= 0);
RUNTIME_ASSERT(index->value() <= subject->length());
RUNTIME_ASSERT(index >= 0);
RUNTIME_ASSERT(index <= subject->length());
Handle<Object> result = RegExpImpl::Exec(regexp,
subject,
index->value(),
index,
last_match_info);
if (result.is_null()) return Failure::Exception();
return *result;

View File

@ -4892,6 +4892,7 @@ TEST(DebugBreakInMessageHandler) {
}
#ifdef V8_NATIVE_REGEXP
// Debug event handler which gets the function on the top frame and schedules a
// break a number of times.
static void DebugEventDebugBreak(
@ -4928,11 +4929,10 @@ static void DebugEventDebugBreak(
TEST(RegExpDebugBreak) {
// This test only applies to native regexps.
v8::HandleScope scope;
DebugLocalContext env;
i::FLAG_regexp_native = true;
// Create a function for checking the function when hitting a break point.
frame_function_name = CompileFunction(&env,
frame_function_name_source,
@ -4957,6 +4957,7 @@ TEST(RegExpDebugBreak) {
CHECK_EQ(20, break_point_hit_count);
CHECK_EQ("exec", last_function_hit);
}
#endif // V8_NATIVE_REGEXP
// Common part of EvalContextData and NestedBreakEventContextData tests.

View File

@ -597,6 +597,8 @@ TEST(DispatchTableConstruction) {
}
// Tests of interpreter.
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
@ -660,8 +662,8 @@ TEST(MacroAssembler) {
CHECK_EQ(42, captures[0]);
}
#ifdef V8_TARGET_ARCH_IA32 // IA32 only tests.
#ifdef V8_TARGET_ARCH_IA32 // IA32 Native Regexp only tests.
#ifdef V8_NATIVE_REGEXP
class ContextInitializer {
public:
@ -1284,10 +1286,10 @@ TEST(MacroAssemblerIA32LotsOfRegisters) {
Top::clear_pending_exception();
}
#endif // V8_REGEXP_NATIVE
#endif // V8_TARGET_ARCH_IA32
#endif // !defined ARM
TEST(AddInverseToTable) {
static const int kLimit = 1000;
static const int kRangeCount = 16;

View File

@ -419,7 +419,8 @@
'target_name': 'v8_base',
'type': '<(library)',
'defines': [
'V8_TARGET_ARCH_IA32'
'V8_TARGET_ARCH_IA32',
'V8_NATIVE_REGEXP'
],
'include_dirs+': [
'../../src',
@ -477,7 +478,8 @@
'target_name': 'v8_nosnapshot',
'type': '<(library)',
'defines': [
'V8_TARGET_ARCH_IA32'
'V8_TARGET_ARCH_IA32',
'V8_NATIVE_REGEXP'
],
'dependencies': [
'js2c',
@ -509,7 +511,8 @@
'target_name': 'v8',
'type': '<(library)',
'defines': [
'V8_TARGET_ARCH_IA32'
'V8_TARGET_ARCH_IA32',
'V8_NATIVE_REGEXP'
],
'dependencies': [
'js2c',
@ -549,7 +552,8 @@
'target_name': 'v8_shell',
'type': 'executable',
'defines': [
'V8_TARGET_ARCH_IA32'
'V8_TARGET_ARCH_IA32',
'V8_NATIVE_REGEXP'
],
'dependencies': [
'v8',
@ -579,7 +583,8 @@
'v8',
],
'defines': [
'V8_TARGET_ARCH_IA32'
'V8_TARGET_ARCH_IA32',
'V8_NATIVE_REGEXP'
],
'include_dirs': [
'../../src',

View File

@ -1449,6 +1449,7 @@
GCC_PREPROCESSOR_DEFINITIONS = (
"$(GCC_PREPROCESSOR_DEFINITIONS)",
V8_TARGET_ARCH_IA32,
V8_NATIVE_REGEXP,
DEBUG,
);
HEADER_SEARCH_PATHS = ../src;
@ -1462,6 +1463,7 @@
GCC_PREPROCESSOR_DEFINITIONS = (
"$(GCC_PREPROCESSOR_DEFINITIONS)",
V8_TARGET_ARCH_IA32,
V8_NATIVE_REGEXP,
NDEBUG,
);
HEADER_SEARCH_PATHS = ../src;
@ -1477,6 +1479,7 @@
"$(GCC_PREPROCESSOR_DEFINITIONS)",
ENABLE_DISASSEMBLER,
V8_TARGET_ARCH_IA32,
V8_NATIVE_REGEXP,
ENABLE_LOGGING_AND_PROFILING,
);
HEADER_SEARCH_PATHS = ../src;
@ -1492,6 +1495,7 @@
GCC_PREPROCESSOR_DEFINITIONS = (
"$(GCC_PREPROCESSOR_DEFINITIONS)",
V8_TARGET_ARCH_IA32,
V8_NATIVE_REGEXP,
NDEBUG,
);
HEADER_SEARCH_PATHS = ../src;

View File

@ -6,6 +6,6 @@
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions="V8_TARGET_ARCH_IA32"
PreprocessorDefinitions="V8_TARGET_ARCH_IA32;V8_NATIVE_REGEXP"
/>
</VisualStudioPropertySheet>