Moved String.prototype.match implementation to C++.

Some extra runtime assertions added.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1608 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2009-03-25 12:14:10 +00:00
parent 7c35a5d820
commit bd8816efb0
8 changed files with 97 additions and 232 deletions

View File

@ -205,25 +205,6 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
}
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject, last_match_info);
case JSRegExp::IRREGEXP: {
Handle<Object> result =
IrregexpExecGlobal(regexp, subject, last_match_info);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
default:
UNREACHABLE();
return Handle<Object>::null();
}
}
// RegExp Atom implementation: Simple string search using indexOf.
@ -273,55 +254,6 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
}
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
Handle<String> subject,
Handle<JSArray> last_match_info) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
ASSERT(last_match_info->HasFastElements());
Handle<JSArray> result = Factory::NewJSArray(1);
int index = 0;
int match_count = 0;
int subject_length = subject->length();
int needle_length = needle->length();
int last_value = -1;
while (true) {
HandleScope scope;
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
}
if (value == -1) {
if (last_value != -1) {
Handle<FixedArray> array(last_match_info->elements());
SetAtomLastCapture(*array,
*subject,
last_value,
last_value + needle->length());
}
break;
}
int end = value + needle_length;
// Create an array that looks like the static last_match_info array
// that is attached to the global RegExp object. We will be returning
// an array of these.
Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
SetLastCaptureCount(*array, 2);
// Ignore subject and input fields.
SetCapture(*array, 0, value);
SetCapture(*array, 1, end);
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
index = end;
if (needle_length == 0) index++;
last_value = value;
}
return result;
}
// Irregexp implementation.
@ -331,8 +263,7 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
// from the source pattern.
// If compilation fails, an exception is thrown and this function
// returns false.
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
bool is_ascii) {
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
int index;
if (is_ascii) {
index = JSRegExp::kIrregexpASCIICodeIndex;
@ -460,22 +391,20 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
}
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
Handle<String> subject,
int index,
int previous_index,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
// Prepare space for the return values.
int number_of_capture_registers =
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
(IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = index;
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
String* pattern = jsregexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
@ -487,104 +416,12 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
return IrregexpExecOnce(regexp,
number_of_capture_registers,
last_match_info,
subject,
previous_index,
offsets.vector(),
offsets.length());
}
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
// Prepare space for the return values.
int number_of_capture_registers =
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
int result_length = 0;
Handle<Object> matches;
if (!subject->IsFlat()) {
FlattenString(subject);
}
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
while (true) {
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match.
return result;
} else {
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
HandleScope scope;
matches = IrregexpExecOnce(regexp,
number_of_capture_registers,
last_match_info,
subject,
previous_index,
offsets.vector(),
offsets.length());
if (matches.is_null()) {
ASSERT(Top::has_pending_exception());
return matches;
}
if (matches->IsJSArray()) {
// Create an array that looks like the static last_match_info array
// that is attached to the global RegExp object. We will be returning
// an array of these.
int match_length = kFirstCapture + number_of_capture_registers;
Handle<JSArray> latest_match =
Factory::NewJSArray(match_length);
AssertNoAllocation no_allocation;
FixedArray* match_array = JSArray::cast(*matches)->elements();
match_array->CopyTo(0,
latest_match->elements(),
0,
match_length);
SetElement(result, result_length, latest_match);
result_length++;
previous_index = GetCapture(match_array, 1);
if (GetCapture(match_array, 0) == previous_index) {
previous_index++;
}
} else {
ASSERT(matches->IsNull());
return result;
}
}
}
}
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int number_of_capture_registers,
Handle<JSArray> last_match_info,
Handle<String> subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
ASSERT(subject->IsFlat());
int* offsets_vector = offsets.vector();
int offsets_vector_length = offsets.length();
bool rc;
// Dispatch to the correct RegExp implementation.
Handle<String> original_subject = subject;
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
if (UseNativeRegexp()) {
@ -631,6 +468,8 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
previous_index);
}
// Handle results from RegExp implementation.
if (!rc) {
return Factory::null_value();
}

View File

@ -94,20 +94,15 @@ class RegExpImpl {
int index,
Handle<JSArray> lastMatchInfo);
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
Handle<JSArray> lastMatchInfo);
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Offsets in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
@ -155,17 +150,6 @@ class RegExpImpl {
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int num_captures,
Handle<JSArray> lastMatchInfo,
Handle<String> subject16,
int previous_index,
int* ovector,
int ovector_length);
// Set the subject cache. The previous string buffer is not deleted, so the
// caller should ensure that it doesn't leak.
static void SetSubjectCache(String* subject,

View File

@ -314,7 +314,7 @@ Time::Time() {
// Initialize timestamp from a JavaScript timestamp.
Time::Time(double jstime) {
t() = static_cast<uint64_t>(jstime) * kTimeScaler + kTimeEpoc;
t() = static_cast<int64_t>(jstime) * kTimeScaler + kTimeEpoc;
}

View File

@ -962,6 +962,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
int previous_index) {
ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread

View File

@ -1038,7 +1038,9 @@ static Object* Runtime_RegExpExec(Arguments args) {
CONVERT_CHECKED(Smi, index, args[2]);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]);
Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements());
RUNTIME_ASSERT(last_match_info->HasFastElements());
RUNTIME_ASSERT(index->value() >= 0);
RUNTIME_ASSERT(index->value() <= subject->length());
Handle<Object> result = RegExpImpl::Exec(regexp,
subject,
index->value(),
@ -1048,23 +1050,6 @@ static Object* Runtime_RegExpExec(Arguments args) {
}
static Object* Runtime_RegExpExecGlobal(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 3);
CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
Handle<JSRegExp> regexp(raw_regexp);
CONVERT_CHECKED(String, raw_subject, args[1]);
Handle<String> subject(raw_subject);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]);
Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements());
Handle<Object> result =
RegExpImpl::ExecGlobal(regexp, subject, last_match_info);
if (result.is_null()) return Failure::Exception();
return *result;
}
static Object* Runtime_MaterializeRegExpLiteral(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 4);
@ -2337,6 +2322,57 @@ static Object* Runtime_StringSlice(Arguments args) {
}
static Object* Runtime_StringMatch(Arguments args) {
ASSERT_EQ(3, args.length());
CONVERT_ARG_CHECKED(String, subject, 0);
CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_CHECKED(JSArray, regexp_info, 2);
HandleScope handles;
Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
if (match.is_null()) {
return Failure::Exception();
}
if (match->IsNull()) {
return Heap::null_value();
}
int length = subject->length();
ZoneScope zone_space(DELETE_ON_EXIT);
ZoneList<int> offsets(8);
do {
int start;
int end;
{
AssertNoAllocation no_alloc;
FixedArray* elements = regexp_info->elements();
start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value();
}
offsets.Add(start);
offsets.Add(end);
int index = start < end ? end : end + 1;
if (index > length) break;
match = RegExpImpl::Exec(regexp, subject, index, regexp_info);
if (match.is_null()) {
return Failure::Exception();
}
} while (!match->IsNull());
int matches = offsets.length() / 2;
Handle<FixedArray> elements = Factory::NewFixedArray(matches);
for (int i = 0; i < matches ; i++) {
int from = offsets.at(i * 2);
int to = offsets.at(i * 2 + 1);
elements->set(i, *Factory::NewStringSlice(subject, from, to));
}
Handle<JSArray> result = Factory::NewJSArrayWithElements(elements);
result->set_length(Smi::FromInt(matches));
return *result;
}
static Object* Runtime_NumberToRadixString(Arguments args) {
NoHandleAllocation ha;
ASSERT(args.length() == 2);

View File

@ -138,7 +138,6 @@ namespace v8 { namespace internal {
/* Regular expressions */ \
F(RegExpCompile, 3) \
F(RegExpExec, 4) \
F(RegExpExecGlobal, 3) \
\
/* Strings */ \
F(StringCharCodeAt, 2) \
@ -147,6 +146,7 @@ namespace v8 { namespace internal {
F(StringLocaleCompare, 2) \
F(StringSlice, 3) \
F(StringReplaceRegExpWithString, 4) \
F(StringMatch, 3) \
\
/* Numbers */ \
F(NumberToRadixString, 2) \

View File

@ -157,21 +157,8 @@ function StringMatch(regexp) {
if (!regexp.global) return regexp.exec(subject);
%_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
var matches = DoRegExpExecGlobal(regexp, subject);
// If the regexp did not match, return null.
if (matches.length == 0) return null;
// Build the result array.
var result = new $Array(match_string);
for (var i = 0; i < matches.length; ++i) {
var matchInfo = matches[i];
var match_string = subject.slice(matchInfo[CAPTURE0],
matchInfo[CAPTURE1]);
result[i] = match_string;
}
return result;
// lastMatchInfo is defined in regexp-delay.js.
return %StringMatch(subject, regexp, lastMatchInfo);
}

View File

@ -30,15 +30,25 @@ function CheckMatch(re, str, matches) {
var result = str.match(re);
if (matches.length > 0) {
assertEquals(matches.length, result.length);
for (idx in matches) {
var lastExpected;
var lastFrom;
var lastLength;
for (var idx = 0; idx < matches.length; idx++) {
var from = matches[idx][0];
var length = matches[idx][1];
var expected = str.substr(from, length);
assertEquals(expected, result[idx]);
var name = str + "[" + from + ".." + (from+length) + "]";
assertEquals(expected, result[idx], name);
if (re.global || idx == 0) {
lastExpected = expected;
lastFrom = from;
lastLength = length;
}
}
assertEquals(expected, RegExp.lastMatch);
assertEquals(str.substr(0, from), RegExp.leftContext);
assertEquals(str.substr(from + length), RegExp.rightContext);
assertEquals(lastExpected, RegExp.lastMatch, "lastMatch");
assertEquals(str.substr(0, lastFrom), RegExp.leftContext, "leftContext");
assertEquals(
str.substr(lastFrom + lastLength), RegExp.rightContext, "rightContext");
} else {
assertTrue(result === null);
}
@ -58,3 +68,10 @@ assertEquals("xxxdefxxxdefxxx", "xxxabcxxxabcxxx".replace(/abc/g, "def"));
assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-"));
assertEquals("deded", "deded".replace(/x/g, "-"));
assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-"));
CheckMatch(/a(.)/, "xyzzyabxyzzzyacxyzzy", [[5, 2], [6, 1]]);
CheckMatch(/a(.)/g, "xyzzyabxyzzyacxyzzy", [[5, 2], [12, 2]]);
CheckMatch(/a|(?:)/g, "aba", [[0, 1], [1, 0], [2, 1], [3, 0]]);
CheckMatch(/a|(?:)/g, "baba", [[0, 0], [1, 1], [2, 0], [3, 1], [4, 0]]);
CheckMatch(/a|(?:)/g, "bab", [[0, 0], [1, 1], [2, 0], [3, 0]]);