Moved String.prototype.match implementation to C++.
Some extra runtime assertions added. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1608 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
7c35a5d820
commit
bd8816efb0
185
src/jsregexp.cc
185
src/jsregexp.cc
@ -205,25 +205,6 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<JSArray> last_match_info) {
|
||||
switch (regexp->TypeTag()) {
|
||||
case JSRegExp::ATOM:
|
||||
return AtomExecGlobal(regexp, subject, last_match_info);
|
||||
case JSRegExp::IRREGEXP: {
|
||||
Handle<Object> result =
|
||||
IrregexpExecGlobal(regexp, subject, last_match_info);
|
||||
ASSERT(!result.is_null() || Top::has_pending_exception());
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Handle<Object>::null();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// RegExp Atom implementation: Simple string search using indexOf.
|
||||
|
||||
|
||||
@ -273,55 +254,6 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
|
||||
Handle<String> subject,
|
||||
Handle<JSArray> last_match_info) {
|
||||
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
|
||||
ASSERT(last_match_info->HasFastElements());
|
||||
Handle<JSArray> result = Factory::NewJSArray(1);
|
||||
int index = 0;
|
||||
int match_count = 0;
|
||||
int subject_length = subject->length();
|
||||
int needle_length = needle->length();
|
||||
int last_value = -1;
|
||||
while (true) {
|
||||
HandleScope scope;
|
||||
int value = -1;
|
||||
if (index + needle_length <= subject_length) {
|
||||
value = Runtime::StringMatch(subject, needle, index);
|
||||
}
|
||||
if (value == -1) {
|
||||
if (last_value != -1) {
|
||||
Handle<FixedArray> array(last_match_info->elements());
|
||||
SetAtomLastCapture(*array,
|
||||
*subject,
|
||||
last_value,
|
||||
last_value + needle->length());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
int end = value + needle_length;
|
||||
|
||||
// Create an array that looks like the static last_match_info array
|
||||
// that is attached to the global RegExp object. We will be returning
|
||||
// an array of these.
|
||||
Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
|
||||
SetLastCaptureCount(*array, 2);
|
||||
// Ignore subject and input fields.
|
||||
SetCapture(*array, 0, value);
|
||||
SetCapture(*array, 1, end);
|
||||
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
|
||||
SetElement(result, match_count, pair);
|
||||
match_count++;
|
||||
index = end;
|
||||
if (needle_length == 0) index++;
|
||||
last_value = value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// Irregexp implementation.
|
||||
|
||||
|
||||
@ -331,8 +263,7 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
|
||||
// from the source pattern.
|
||||
// If compilation fails, an exception is thrown and this function
|
||||
// returns false.
|
||||
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
|
||||
bool is_ascii) {
|
||||
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
|
||||
int index;
|
||||
if (is_ascii) {
|
||||
index = JSRegExp::kIrregexpASCIICodeIndex;
|
||||
@ -460,22 +391,20 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
|
||||
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
|
||||
Handle<String> subject,
|
||||
int index,
|
||||
int previous_index,
|
||||
Handle<JSArray> last_match_info) {
|
||||
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
|
||||
ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
|
||||
|
||||
// Prepare space for the return values.
|
||||
int number_of_capture_registers =
|
||||
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
|
||||
(IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
|
||||
OffsetsVector offsets(number_of_capture_registers);
|
||||
|
||||
int previous_index = index;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (FLAG_trace_regexp_bytecodes) {
|
||||
String* pattern = regexp->Pattern();
|
||||
String* pattern = jsregexp->Pattern();
|
||||
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
|
||||
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
|
||||
}
|
||||
@ -487,104 +416,12 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
|
||||
|
||||
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
|
||||
|
||||
return IrregexpExecOnce(regexp,
|
||||
number_of_capture_registers,
|
||||
last_match_info,
|
||||
subject,
|
||||
previous_index,
|
||||
offsets.vector(),
|
||||
offsets.length());
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<JSArray> last_match_info) {
|
||||
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
|
||||
|
||||
// Prepare space for the return values.
|
||||
int number_of_capture_registers =
|
||||
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
|
||||
OffsetsVector offsets(number_of_capture_registers);
|
||||
|
||||
int previous_index = 0;
|
||||
|
||||
Handle<JSArray> result = Factory::NewJSArray(0);
|
||||
int result_length = 0;
|
||||
Handle<Object> matches;
|
||||
|
||||
if (!subject->IsFlat()) {
|
||||
FlattenString(subject);
|
||||
}
|
||||
|
||||
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
|
||||
|
||||
while (true) {
|
||||
if (previous_index > subject->length() || previous_index < 0) {
|
||||
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
|
||||
// string length, there is no match.
|
||||
return result;
|
||||
} else {
|
||||
#ifdef DEBUG
|
||||
if (FLAG_trace_regexp_bytecodes) {
|
||||
String* pattern = regexp->Pattern();
|
||||
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
|
||||
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
|
||||
}
|
||||
#endif
|
||||
HandleScope scope;
|
||||
matches = IrregexpExecOnce(regexp,
|
||||
number_of_capture_registers,
|
||||
last_match_info,
|
||||
subject,
|
||||
previous_index,
|
||||
offsets.vector(),
|
||||
offsets.length());
|
||||
|
||||
if (matches.is_null()) {
|
||||
ASSERT(Top::has_pending_exception());
|
||||
return matches;
|
||||
}
|
||||
|
||||
if (matches->IsJSArray()) {
|
||||
// Create an array that looks like the static last_match_info array
|
||||
// that is attached to the global RegExp object. We will be returning
|
||||
// an array of these.
|
||||
int match_length = kFirstCapture + number_of_capture_registers;
|
||||
Handle<JSArray> latest_match =
|
||||
Factory::NewJSArray(match_length);
|
||||
|
||||
AssertNoAllocation no_allocation;
|
||||
FixedArray* match_array = JSArray::cast(*matches)->elements();
|
||||
match_array->CopyTo(0,
|
||||
latest_match->elements(),
|
||||
0,
|
||||
match_length);
|
||||
SetElement(result, result_length, latest_match);
|
||||
result_length++;
|
||||
previous_index = GetCapture(match_array, 1);
|
||||
if (GetCapture(match_array, 0) == previous_index) {
|
||||
previous_index++;
|
||||
}
|
||||
} else {
|
||||
ASSERT(matches->IsNull());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
|
||||
int number_of_capture_registers,
|
||||
Handle<JSArray> last_match_info,
|
||||
Handle<String> subject,
|
||||
int previous_index,
|
||||
int* offsets_vector,
|
||||
int offsets_vector_length) {
|
||||
ASSERT(subject->IsFlat());
|
||||
int* offsets_vector = offsets.vector();
|
||||
int offsets_vector_length = offsets.length();
|
||||
bool rc;
|
||||
|
||||
// Dispatch to the correct RegExp implementation.
|
||||
|
||||
Handle<String> original_subject = subject;
|
||||
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
|
||||
if (UseNativeRegexp()) {
|
||||
@ -631,6 +468,8 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
|
||||
previous_index);
|
||||
}
|
||||
|
||||
// Handle results from RegExp implementation.
|
||||
|
||||
if (!rc) {
|
||||
return Factory::null_value();
|
||||
}
|
||||
|
@ -94,20 +94,15 @@ class RegExpImpl {
|
||||
int index,
|
||||
Handle<JSArray> lastMatchInfo);
|
||||
|
||||
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<JSArray> lastMatchInfo);
|
||||
|
||||
// Execute an Irregexp bytecode pattern.
|
||||
// On a successful match, the result is a JSArray containing
|
||||
// captured positions. On a failure, the result is the null value.
|
||||
// Returns an empty handle in case of an exception.
|
||||
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
int index,
|
||||
Handle<JSArray> lastMatchInfo);
|
||||
|
||||
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<JSArray> lastMatchInfo);
|
||||
|
||||
// Offsets in the lastMatchInfo array.
|
||||
static const int kLastCaptureCount = 0;
|
||||
static const int kLastSubject = 1;
|
||||
@ -155,17 +150,6 @@ class RegExpImpl {
|
||||
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
|
||||
|
||||
|
||||
// On a successful match, the result is a JSArray containing
|
||||
// captured positions. On a failure, the result is the null value.
|
||||
// Returns an empty handle in case of an exception.
|
||||
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp,
|
||||
int num_captures,
|
||||
Handle<JSArray> lastMatchInfo,
|
||||
Handle<String> subject16,
|
||||
int previous_index,
|
||||
int* ovector,
|
||||
int ovector_length);
|
||||
|
||||
// Set the subject cache. The previous string buffer is not deleted, so the
|
||||
// caller should ensure that it doesn't leak.
|
||||
static void SetSubjectCache(String* subject,
|
||||
|
@ -314,7 +314,7 @@ Time::Time() {
|
||||
|
||||
// Initialize timestamp from a JavaScript timestamp.
|
||||
Time::Time(double jstime) {
|
||||
t() = static_cast<uint64_t>(jstime) * kTimeScaler + kTimeEpoc;
|
||||
t() = static_cast<int64_t>(jstime) * kTimeScaler + kTimeEpoc;
|
||||
}
|
||||
|
||||
|
||||
|
@ -962,6 +962,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
|
||||
int previous_index) {
|
||||
|
||||
ASSERT(subject->IsFlat());
|
||||
ASSERT(previous_index >= 0);
|
||||
ASSERT(previous_index <= subject->length());
|
||||
|
||||
// No allocations before calling the regexp, but we can't use
|
||||
// AssertNoAllocation, since regexps might be preempted, and another thread
|
||||
|
@ -1038,7 +1038,9 @@ static Object* Runtime_RegExpExec(Arguments args) {
|
||||
CONVERT_CHECKED(Smi, index, args[2]);
|
||||
CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]);
|
||||
Handle<JSArray> last_match_info(raw_last_match_info);
|
||||
CHECK(last_match_info->HasFastElements());
|
||||
RUNTIME_ASSERT(last_match_info->HasFastElements());
|
||||
RUNTIME_ASSERT(index->value() >= 0);
|
||||
RUNTIME_ASSERT(index->value() <= subject->length());
|
||||
Handle<Object> result = RegExpImpl::Exec(regexp,
|
||||
subject,
|
||||
index->value(),
|
||||
@ -1048,23 +1050,6 @@ static Object* Runtime_RegExpExec(Arguments args) {
|
||||
}
|
||||
|
||||
|
||||
static Object* Runtime_RegExpExecGlobal(Arguments args) {
|
||||
HandleScope scope;
|
||||
ASSERT(args.length() == 3);
|
||||
CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
|
||||
Handle<JSRegExp> regexp(raw_regexp);
|
||||
CONVERT_CHECKED(String, raw_subject, args[1]);
|
||||
Handle<String> subject(raw_subject);
|
||||
CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]);
|
||||
Handle<JSArray> last_match_info(raw_last_match_info);
|
||||
CHECK(last_match_info->HasFastElements());
|
||||
Handle<Object> result =
|
||||
RegExpImpl::ExecGlobal(regexp, subject, last_match_info);
|
||||
if (result.is_null()) return Failure::Exception();
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
static Object* Runtime_MaterializeRegExpLiteral(Arguments args) {
|
||||
HandleScope scope;
|
||||
ASSERT(args.length() == 4);
|
||||
@ -2337,6 +2322,57 @@ static Object* Runtime_StringSlice(Arguments args) {
|
||||
}
|
||||
|
||||
|
||||
static Object* Runtime_StringMatch(Arguments args) {
|
||||
ASSERT_EQ(3, args.length());
|
||||
|
||||
CONVERT_ARG_CHECKED(String, subject, 0);
|
||||
CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
|
||||
CONVERT_ARG_CHECKED(JSArray, regexp_info, 2);
|
||||
HandleScope handles;
|
||||
|
||||
Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
|
||||
|
||||
if (match.is_null()) {
|
||||
return Failure::Exception();
|
||||
}
|
||||
if (match->IsNull()) {
|
||||
return Heap::null_value();
|
||||
}
|
||||
int length = subject->length();
|
||||
|
||||
ZoneScope zone_space(DELETE_ON_EXIT);
|
||||
ZoneList<int> offsets(8);
|
||||
do {
|
||||
int start;
|
||||
int end;
|
||||
{
|
||||
AssertNoAllocation no_alloc;
|
||||
FixedArray* elements = regexp_info->elements();
|
||||
start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
|
||||
end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value();
|
||||
}
|
||||
offsets.Add(start);
|
||||
offsets.Add(end);
|
||||
int index = start < end ? end : end + 1;
|
||||
if (index > length) break;
|
||||
match = RegExpImpl::Exec(regexp, subject, index, regexp_info);
|
||||
if (match.is_null()) {
|
||||
return Failure::Exception();
|
||||
}
|
||||
} while (!match->IsNull());
|
||||
int matches = offsets.length() / 2;
|
||||
Handle<FixedArray> elements = Factory::NewFixedArray(matches);
|
||||
for (int i = 0; i < matches ; i++) {
|
||||
int from = offsets.at(i * 2);
|
||||
int to = offsets.at(i * 2 + 1);
|
||||
elements->set(i, *Factory::NewStringSlice(subject, from, to));
|
||||
}
|
||||
Handle<JSArray> result = Factory::NewJSArrayWithElements(elements);
|
||||
result->set_length(Smi::FromInt(matches));
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
static Object* Runtime_NumberToRadixString(Arguments args) {
|
||||
NoHandleAllocation ha;
|
||||
ASSERT(args.length() == 2);
|
||||
|
@ -138,7 +138,6 @@ namespace v8 { namespace internal {
|
||||
/* Regular expressions */ \
|
||||
F(RegExpCompile, 3) \
|
||||
F(RegExpExec, 4) \
|
||||
F(RegExpExecGlobal, 3) \
|
||||
\
|
||||
/* Strings */ \
|
||||
F(StringCharCodeAt, 2) \
|
||||
@ -147,6 +146,7 @@ namespace v8 { namespace internal {
|
||||
F(StringLocaleCompare, 2) \
|
||||
F(StringSlice, 3) \
|
||||
F(StringReplaceRegExpWithString, 4) \
|
||||
F(StringMatch, 3) \
|
||||
\
|
||||
/* Numbers */ \
|
||||
F(NumberToRadixString, 2) \
|
||||
|
@ -157,21 +157,8 @@ function StringMatch(regexp) {
|
||||
|
||||
if (!regexp.global) return regexp.exec(subject);
|
||||
%_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
|
||||
var matches = DoRegExpExecGlobal(regexp, subject);
|
||||
|
||||
// If the regexp did not match, return null.
|
||||
if (matches.length == 0) return null;
|
||||
|
||||
// Build the result array.
|
||||
var result = new $Array(match_string);
|
||||
for (var i = 0; i < matches.length; ++i) {
|
||||
var matchInfo = matches[i];
|
||||
var match_string = subject.slice(matchInfo[CAPTURE0],
|
||||
matchInfo[CAPTURE1]);
|
||||
result[i] = match_string;
|
||||
}
|
||||
|
||||
return result;
|
||||
// lastMatchInfo is defined in regexp-delay.js.
|
||||
return %StringMatch(subject, regexp, lastMatchInfo);
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,15 +30,25 @@ function CheckMatch(re, str, matches) {
|
||||
var result = str.match(re);
|
||||
if (matches.length > 0) {
|
||||
assertEquals(matches.length, result.length);
|
||||
for (idx in matches) {
|
||||
var lastExpected;
|
||||
var lastFrom;
|
||||
var lastLength;
|
||||
for (var idx = 0; idx < matches.length; idx++) {
|
||||
var from = matches[idx][0];
|
||||
var length = matches[idx][1];
|
||||
var expected = str.substr(from, length);
|
||||
assertEquals(expected, result[idx]);
|
||||
var name = str + "[" + from + ".." + (from+length) + "]";
|
||||
assertEquals(expected, result[idx], name);
|
||||
if (re.global || idx == 0) {
|
||||
lastExpected = expected;
|
||||
lastFrom = from;
|
||||
lastLength = length;
|
||||
}
|
||||
}
|
||||
assertEquals(expected, RegExp.lastMatch);
|
||||
assertEquals(str.substr(0, from), RegExp.leftContext);
|
||||
assertEquals(str.substr(from + length), RegExp.rightContext);
|
||||
assertEquals(lastExpected, RegExp.lastMatch, "lastMatch");
|
||||
assertEquals(str.substr(0, lastFrom), RegExp.leftContext, "leftContext");
|
||||
assertEquals(
|
||||
str.substr(lastFrom + lastLength), RegExp.rightContext, "rightContext");
|
||||
} else {
|
||||
assertTrue(result === null);
|
||||
}
|
||||
@ -58,3 +68,10 @@ assertEquals("xxxdefxxxdefxxx", "xxxabcxxxabcxxx".replace(/abc/g, "def"));
|
||||
assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-"));
|
||||
assertEquals("deded", "deded".replace(/x/g, "-"));
|
||||
assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-"));
|
||||
|
||||
CheckMatch(/a(.)/, "xyzzyabxyzzzyacxyzzy", [[5, 2], [6, 1]]);
|
||||
CheckMatch(/a(.)/g, "xyzzyabxyzzyacxyzzy", [[5, 2], [12, 2]]);
|
||||
|
||||
CheckMatch(/a|(?:)/g, "aba", [[0, 1], [1, 0], [2, 1], [3, 0]]);
|
||||
CheckMatch(/a|(?:)/g, "baba", [[0, 0], [1, 1], [2, 0], [3, 1], [4, 0]]);
|
||||
CheckMatch(/a|(?:)/g, "bab", [[0, 0], [1, 1], [2, 0], [3, 0]]);
|
Loading…
Reference in New Issue
Block a user