Address comments about my code in http://codereview.chromium.org/12427

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@847 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2008-11-26 12:18:17 +00:00
parent b8013e590f
commit 1dd110b800
9 changed files with 116 additions and 90 deletions

View File

@ -25,7 +25,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Regexp2000 byte code.
// A light-weight assembler for the Irregexp byte code.
#include "v8.h"
@ -67,16 +67,16 @@ void IrregexpAssembler::Emit32(uint32_t word) {
void IrregexpAssembler::EmitOrLink(Label* l) {
if (l->is_bound()) {
Emit32(l->pos());
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
if (l->is_bound()) {
Emit32(l->pos());
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
}
} } // namespace v8::internal

View File

@ -40,9 +40,9 @@ namespace v8 { namespace internal {
IrregexpAssembler::IrregexpAssembler(Vector<byte> buffer)
: buffer_(buffer),
pc_(0),
own_buffer_(false) {
: buffer_(buffer),
pc_(0),
own_buffer_(false) {
}
@ -232,7 +232,7 @@ void IrregexpAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
void IrregexpAssembler::CheckNotBackReference(int capture_index,
Label* on_mismatch) {
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF);
Emit(capture_index);
EmitOrLink(on_mismatch);

View File

@ -1,4 +1,29 @@
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Irregexp byte code.
@ -47,9 +72,11 @@ class IrregexpAssembler {
void Fail();
void Succeed();
void Break(); // This instruction will cause a fatal VM error if hit.
// This instruction will cause a fatal VM error if hit.
void Break();
void Bind(Label* l); // Binds an unbound label L to the current code posn.
// Binds an unbound label L to the current code posn.
void Bind(Label* l);
void AdvanceCP(int by);
@ -69,11 +96,11 @@ class IrregexpAssembler {
void CheckCharacterLT(uc16 limit, Label* on_less);
void CheckCharacterGT(uc16 limit, Label* on_greater);
// Checks current position for a match against a
// previous capture. Advances current position by the length of the capture
// iff it matches. The capture is stored in a given register and the
// the register after. If a register contains -1 then the other register
// must always contain -1 and the on_mismatch label will never be called.
// Checks current position for a match against a previous capture. Advances
// current position by the length of the capture iff it matches. The capture
// is stored in a given register and the register after. If a register
// contains -1 then the other register must always contain -1 and the
// on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
@ -82,7 +109,7 @@ class IrregexpAssembler {
void CheckRegisterGE(int reg_index, uint16_t vs, Label* on_greater_equal);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a bit array, uses the result of that decide whether to fall
// look up in a bit array, uses the result of that to decide whether to fall
// though (on 1) or jump to the on_zero label (on 0).
void LookupMap1(uc16 start, Label* bit_map, Label* on_zero);
@ -114,22 +141,20 @@ class IrregexpAssembler {
inline void EmitOrLink(Label* l);
private:
// Don't use this.
IrregexpAssembler() { UNREACHABLE(); }
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
inline void CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
Label* on_true);
// Code generation.
int pc_; // The program counter; moves forward.
void Expand();
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
// The program counter.
int pc_;
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
void Expand();
DISALLOW_IMPLICIT_CONSTRUCTORS(IrregexpAssembler);
};

View File

@ -31,7 +31,7 @@
namespace v8 { namespace internal {
#define BYTECODE_ITERATOR(V) \
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 1) /* break */ \
V(PUSH_CP, 1, 5) /* push_cp offset32 */ \
V(PUSH_BT, 2, 5) /* push_bt addr32 */ \
@ -63,7 +63,7 @@ V(LOOKUP_MAP2, 27, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 28, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 29, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 30, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;

View File

@ -202,7 +202,7 @@ DEFINE_bool(preemption, false,
// irregexp
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode executon")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode execution")
DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case independent")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")

View File

@ -69,10 +69,10 @@ static void TraceInterpreter(const byte* code_base,
const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
pc - code_base,
stack_depth,
current_position,
bytecode_name);
pc - code_base,
stack_depth,
current_position,
bytecode_name);
for (int i = 1; i < bytecode_length; i++) {
printf(", %02x", pc[i]);
}
@ -81,15 +81,17 @@ static void TraceInterpreter(const byte* code_base,
}
# define BYTECODE(name) case BC_##name: \
TraceInterpreter(code_base, \
pc, \
backtrack_sp - backtrack_stack, \
current, \
BC_##name##_LENGTH, \
#name);
#define BYTECODE(name) \
case BC_##name: \
TraceInterpreter(code_base, \
pc, \
backtrack_sp - backtrack_stack, \
current, \
BC_##name##_LENGTH, \
#name);
#else
# define BYTECODE(name) case BC_##name: // NOLINT
#define BYTECODE(name) \
case BC_##name:
#endif
@ -371,7 +373,6 @@ bool IrregexpInterpreter::Match(Handle<ByteArray> code_array,
ASSERT(StringShape(*subject16).IsTwoByteRepresentation());
ASSERT(subject16->IsFlat(StringShape(*subject16)));
AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
return RawMatch(code_base,

View File

@ -25,7 +25,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A simple interpreter for the Regexp2000 byte code.
// A simple interpreter for the Irregexp byte code.
#ifndef V8_INTERPRETER_IRREGEXP_H_
#define V8_INTERPRETER_IRREGEXP_H_

View File

@ -479,18 +479,18 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
bool rc;
{
for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
FixedArray* irregexp =
FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
switch (tag) {
FixedArray* irregexp =
FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
switch (tag) {
case RegExpMacroAssembler::kIA32Implementation: {
Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
SmartPointer<int> captures(NewArray<int>((num_captures + 1) * 2));
@ -530,7 +530,6 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
UNREACHABLE();
rc = false;
break;
}
}
if (!rc) {
@ -655,13 +654,12 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject16 = CachedStringToTwoByte(subject);
Handle<Object> result(
IrregexpExecOnce(regexp,
num_captures,
subject16,
previous_index,
offsets.vector(),
offsets.length()));
Handle<Object> result(IrregexpExecOnce(regexp,
num_captures,
subject16,
previous_index,
offsets.vector(),
offsets.length()));
return result;
}
@ -738,9 +736,11 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
} while (matches->IsJSArray());
// If we exited the loop with an exception, throw it.
if (matches->IsNull()) { // Exited loop normally.
if (matches->IsNull()) {
// Exited loop normally.
return result;
} else { // Exited loop with the exception in matches.
} else {
// Exited loop with the exception in matches.
return matches;
}
}
@ -794,9 +794,11 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
} while (matches->IsJSArray());
// If we exited the loop with an exception, throw it.
if (matches->IsNull()) { // Exited loop normally.
if (matches->IsNull()) {
// Exited loop normally.
return result;
} else { // Exited loop with the exception in matches.
} else {
// Exited loop with the exception in matches.
return matches;
}
}
@ -804,7 +806,7 @@ Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
return Smi::cast(value->get(kJscreNumberOfCapturesIndex))-> value();
return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
}
@ -836,7 +838,7 @@ Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
// -------------------------------------------------------------------
// New regular expression engine
// Implmentation of the Irregexp regular expression engine.
void RegExpTree::AppendToText(RegExpText* text) {
@ -1001,10 +1003,10 @@ bool EndNode::GoTo(RegExpCompiler* compiler) {
switch (action_) {
case ACCEPT:
compiler->macro_assembler()->Succeed();
break;
break;
case BACKTRACK:
compiler->macro_assembler()->Backtrack();
break;
break;
}
return true;
}
@ -1150,19 +1152,18 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
ASSERT(c2 > c1);
macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
return true;
} else {
ASSERT(c2 > c1);
uc16 diff = c2 - c1;
if (((diff - 1) & diff) == 0 && c1 >= diff) {
// If the characters differ by 2^n but don't differ by one bit then
// subtract the difference from the found character, then do the or
// trick. We avoid the theoretical case where negative numbers are
// involved in order to simplify code generation.
macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
diff,
on_failure);
return true;
}
}
ASSERT(c2 > c1);
uc16 diff = c2 - c1;
if (((diff - 1) & diff) == 0 && c1 >= diff) {
// If the characters differ by 2^n but don't differ by one bit then
// subtract the difference from the found character, then do the or
// trick. We avoid the theoretical case where negative numbers are
// involved in order to simplify code generation.
macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
diff,
on_failure);
return true;
}
return false;
}
@ -1224,7 +1225,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
Label success;
Label *char_is_in_class =
Label* char_is_in_class =
cc->is_negated() ? on_failure : &success;
int range_count = ranges->length();
@ -1361,8 +1362,7 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler) {
Bind(macro_assembler);
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
int i;
for (i = 0; i < choice_count - 1; i++) {
for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
Label after;
Label after_no_pop_cp;
@ -1384,7 +1384,7 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler) {
macro_assembler->PopCurrentPosition();
macro_assembler->Bind(&after_no_pop_cp);
}
GuardedAlternative alternative = alternatives_->at(i);
GuardedAlternative alternative = alternatives_->at(choice_count - 1);
ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) {
int guard_count = guards->length();

View File

@ -4296,7 +4296,7 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
ASSERT(result != NULL);
// Get multiline flag somehow
// TODO(plesner): Get multiline flag somehow
RegExpParser parser(input, &result->error, false);
bool ok = true;
result->tree = parser.ParsePattern(&ok);