Merge regexp2000 back into bleeding_edge

Review URL: http://codereview.chromium.org/12427

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@832 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
christian.plesner.hansen@gmail.com 2008-11-25 11:07:48 +00:00
parent 112e9ebbe5
commit b57b4a15cd
53 changed files with 8756 additions and 468 deletions

View File

@ -35,15 +35,17 @@ Import('context')
SOURCES = {
'all': [
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc', 'ast.cc',
'bootstrapper.cc', 'builtins.cc', 'checks.cc', 'code-stubs.cc',
'codegen.cc', 'compilation-cache.cc', 'compiler.cc', 'contexts.cc',
'conversions.cc', 'counters.cc', 'dateparser.cc', 'debug.cc',
'disassembler.cc', 'execution.cc', 'factory.cc', 'flags.cc', 'frames.cc',
'global-handles.cc', 'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc',
'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc', 'objects.cc',
'parser.cc', 'property.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc',
'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc',
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc',
'assembler-irregexp.cc', 'ast.cc', 'bootstrapper.cc', 'builtins.cc',
'checks.cc', 'code-stubs.cc', 'codegen.cc', 'compilation-cache.cc',
'compiler.cc', 'contexts.cc', 'conversions.cc', 'counters.cc',
'dateparser.cc', 'debug.cc', 'disassembler.cc', 'execution.cc',
'factory.cc', 'flags.cc', 'frames.cc', 'global-handles.cc',
'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc', 'interpreter-irregexp.cc',
'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc',
'objects.cc', 'parser.cc', 'property.cc', 'regexp-macro-assembler.cc',
'regexp-macro-assembler-irregexp.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc',
'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc',
'spaces.cc', 'string-stream.cc', 'stub-cache.cc', 'token.cc', 'top.cc',
'unicode.cc', 'usage-analyzer.cc', 'utils.cc', 'v8-counters.cc',
'v8.cc', 'v8threads.cc', 'variables.cc', 'zone.cc'
@ -53,7 +55,8 @@ SOURCES = {
'macro-assembler-arm.cc', 'stub-cache-arm.cc'],
'arch:ia32': ['assembler-ia32.cc', 'builtins-ia32.cc', 'codegen-ia32.cc',
'cpu-ia32.cc', 'disasm-ia32.cc', 'frames-ia32.cc', 'ic-ia32.cc',
'macro-assembler-ia32.cc', 'stub-cache-ia32.cc'],
'macro-assembler-ia32.cc', 'regexp-macro-assembler-ia32.cc',
'stub-cache-ia32.cc'],
'simulator:arm': ['simulator-arm.cc'],
'os:freebsd': ['platform-freebsd.cc'],
'os:linux': ['platform-linux.cc'],

View File

@ -205,6 +205,14 @@ void Assembler::emit(const Immediate& x) {
}
void Assembler::emit_w(const Immediate& x) {
ASSERT(x.rmode_ == RelocInfo::NONE);
uint16_t value = static_cast<uint16_t>(x.x_);
reinterpret_cast<uint16_t*>(pc_)[0] = value;
pc_ += sizeof(uint16_t);
}
Address Assembler::target_address_at(Address pc) {
return pc + sizeof(int32_t) + *reinterpret_cast<int32_t*>(pc);
}

View File

@ -122,7 +122,8 @@ void CpuFeatures::Probe() {
#undef __
CodeDesc desc;
assm.GetCode(&desc);
Object* code = Heap::CreateCode(desc, NULL, Code::ComputeFlags(Code::STUB));
Object* code =
Heap::CreateCode(desc, NULL, Code::ComputeFlags(Code::STUB), NULL);
if (!code->IsCode()) return;
F0 f = FUNCTION_CAST<F0>(Code::cast(code)->entry());
uint32_t res = f();
@ -294,7 +295,6 @@ Assembler::Assembler(void* buffer, int buffer_size) {
}
buffer_size_ = buffer_size;
own_buffer_ = true;
} else {
// use externally provided buffer instead
ASSERT(buffer_size > 0);
@ -420,6 +420,29 @@ void Assembler::push(const Operand& src) {
}
void Assembler::push(Label* label, RelocInfo::Mode reloc_mode) {
ASSERT_NOT_NULL(label);
EnsureSpace ensure_space(this);
last_pc_ = pc_;
// If reloc_mode == NONE, the label is stored as buffer relative.
ASSERT(reloc_mode == RelocInfo::NONE);
if (label->is_bound()) {
// Index of position in Code object:
int pos = label->pos() + Code::kHeaderSize;
if (pos >= 0 && pos < 256) {
EMIT(0x6a);
EMIT(pos);
} else {
EMIT(0x68);
emit(pos);
}
} else {
EMIT(0x68);
emit_disp(label, Displacement::CODE_RELATIVE);
}
}
void Assembler::pop(Register dst) {
ASSERT(reloc_info_writer.last_pc() != NULL);
if (FLAG_push_pop_elimination && (reloc_info_writer.last_pc() <= last_pc_)) {
@ -546,6 +569,22 @@ void Assembler::pop(const Operand& dst) {
}
void Assembler::enter(const Immediate& size) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xC8);
emit_w(size);
EMIT(0);
}
void Assembler::leave() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xC9);
}
void Assembler::mov_b(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
@ -830,6 +869,23 @@ void Assembler::cmp(const Operand& op, const Immediate& imm) {
}
void Assembler::rep_cmpsb() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xFC); // CLD to ensure forward operation
EMIT(0xF3); // REP
EMIT(0xA6); // CMPSB
}
void Assembler::rep_cmpsw() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xFC); // CLD to ensure forward operation
EMIT(0xF3); // REP
EMIT(0xA7); // CMPSW
}
void Assembler::dec_b(Register dst) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
@ -1074,6 +1130,14 @@ void Assembler::shr(Register dst) {
}
void Assembler::shr_cl(Register dst) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xD1);
EMIT(0xE8 | dst.code());
}
void Assembler::sub(const Operand& dst, const Immediate& x) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
@ -1171,6 +1235,15 @@ void Assembler::xor_(const Operand& dst, const Immediate& x) {
}
void Assembler::bt(const Operand& dst, Register src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x0F);
EMIT(0xA3);
emit_operand(src, dst);
}
void Assembler::bts(const Operand& dst, Register src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
@ -1224,13 +1297,6 @@ void Assembler::ret(int imm16) {
}
void Assembler::leave() {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0xC9);
}
// Labels refer to positions in the (to be) generated code.
// There are bound, linked, and unused labels.
//
@ -1270,12 +1336,16 @@ void Assembler::bind_to(Label* L, int pos) {
while (L->is_linked()) {
Displacement disp = disp_at(L);
int fixup_pos = L->pos();
if (disp.type() == Displacement::UNCONDITIONAL_JUMP) {
ASSERT(byte_at(fixup_pos - 1) == 0xE9); // jmp expected
if (disp.type() == Displacement::CODE_RELATIVE) {
long_at_put(fixup_pos, pos + Code::kHeaderSize);
} else {
if (disp.type() == Displacement::UNCONDITIONAL_JUMP) {
ASSERT(byte_at(fixup_pos - 1) == 0xE9); // jmp expected
}
// relative address, relative to point after address
int imm32 = pos - (fixup_pos + sizeof(int32_t));
long_at_put(fixup_pos, imm32);
}
// relative address, relative to point after address
int imm32 = pos - (fixup_pos + sizeof(int32_t));
long_at_put(fixup_pos, imm32);
disp.next(L);
}
L->bind_to(pos);

View File

@ -118,8 +118,8 @@ enum Condition {
not_equal = 5,
below_equal = 6,
above = 7,
sign = 8,
not_sign = 9,
negative = 8,
positive = 9,
parity_even = 10,
parity_odd = 11,
less = 12,
@ -128,10 +128,12 @@ enum Condition {
greater = 15,
// aliases
carry = below,
not_carry = above_equal,
zero = equal,
not_zero = not_equal,
negative = sign,
positive = not_sign
sign = negative,
not_sign = positive
};
@ -283,13 +285,14 @@ class Operand BASE_EMBEDDED {
//
// Displacement _data field layout
//
// |31.....1| ......0|
// |31.....2|1......0|
// [ next | type |
class Displacement BASE_EMBEDDED {
public:
enum Type {
UNCONDITIONAL_JUMP,
CODE_RELATIVE,
OTHER
};
@ -313,8 +316,8 @@ class Displacement BASE_EMBEDDED {
private:
int data_;
class TypeField: public BitField<Type, 0, 1> {};
class NextField: public BitField<int, 1, 32-1> {};
class TypeField: public BitField<Type, 0, 2> {};
class NextField: public BitField<int, 2, 32-2> {};
void init(Label* L, Type type);
};
@ -440,10 +443,14 @@ class Assembler : public Malloced {
void push(const Immediate& x);
void push(Register src);
void push(const Operand& src);
void push(Label* label, RelocInfo::Mode relocation_mode);
void pop(Register dst);
void pop(const Operand& dst);
void enter(const Immediate& size);
void leave();
// Moves
void mov_b(Register dst, const Operand& src);
void mov_b(const Operand& dst, int8_t imm8);
@ -491,6 +498,9 @@ class Assembler : public Malloced {
void cmp(Register reg, const Operand& op);
void cmp(const Operand& op, const Immediate& imm);
void rep_cmpsb();
void rep_cmpsw();
void dec_b(Register dst);
void dec(Register dst);
@ -535,6 +545,7 @@ class Assembler : public Malloced {
void shr(Register dst, uint8_t imm8);
void shr(Register dst);
void shr_cl(Register dst);
void sub(const Operand& dst, const Immediate& x);
void sub(Register dst, const Operand& src);
@ -550,6 +561,7 @@ class Assembler : public Malloced {
void xor_(const Operand& dst, const Immediate& x);
// Bit operations.
void bt(const Operand& dst, Register src);
void bts(const Operand& dst, Register src);
// Miscellaneous
@ -558,7 +570,6 @@ class Assembler : public Malloced {
void nop();
void rdtsc();
void ret(int imm16);
void leave();
// Label operations & relative jumps (PPUM Appendix D)
//
@ -748,6 +759,7 @@ class Assembler : public Malloced {
inline void emit(Handle<Object> handle);
inline void emit(uint32_t x, RelocInfo::Mode rmode);
inline void emit(const Immediate& x);
inline void emit_w(const Immediate& x);
// instruction generation
void emit_arith_b(int op1, int op2, Register dst, int imm8);

View File

@ -0,0 +1,82 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Regexp2000 byte code.
#include "v8.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "assembler-irregexp.h"
namespace v8 { namespace internal {
void IrregexpAssembler::Emit(uint32_t byte) {
ASSERT(pc_ <= buffer_.length());
if (pc_ == buffer_.length()) {
Expand();
}
buffer_[pc_++] = byte;
}
void IrregexpAssembler::Emit16(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ + 1 >= buffer_.length()) {
Expand();
}
Store16(buffer_.start() + pc_, word);
pc_ += 2;
}
void IrregexpAssembler::Emit32(uint32_t word) {
ASSERT(pc_ <= buffer_.length());
if (pc_ + 3 >= buffer_.length()) {
Expand();
}
Store32(buffer_.start() + pc_, word);
pc_ += 4;
}
void IrregexpAssembler::EmitOrLink(Label* l) {
if (l->is_bound()) {
Emit32(l->pos());
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
}
} } // namespace v8::internal

339
src/assembler-irregexp.cc Normal file
View File

@ -0,0 +1,339 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A light-weight assembler for the Irregexp byte code.
#include "v8.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "assembler-irregexp.h"
#include "assembler-irregexp-inl.h"
namespace v8 { namespace internal {
IrregexpAssembler::IrregexpAssembler(Vector<byte> buffer)
: buffer_(buffer),
pc_(0),
own_buffer_(false) {
}
IrregexpAssembler::~IrregexpAssembler() {
if (own_buffer_) {
buffer_.Dispose();
}
}
void IrregexpAssembler::PushCurrentPosition(int cp_offset) {
ASSERT(cp_offset >= 0);
Emit(BC_PUSH_CP);
Emit32(cp_offset);
}
void IrregexpAssembler::PushBacktrack(Label* l) {
Emit(BC_PUSH_BT);
EmitOrLink(l);
}
void IrregexpAssembler::PushRegister(int index) {
ASSERT(index >= 0);
Emit(BC_PUSH_REGISTER);
Emit(index);
}
void IrregexpAssembler::WriteCurrentPositionToRegister(int index,
int cp_offset) {
ASSERT(cp_offset >= 0);
ASSERT(index >= 0);
Emit(BC_SET_REGISTER_TO_CP);
Emit(index);
Emit32(cp_offset);
}
void IrregexpAssembler::ReadCurrentPositionFromRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_CP_TO_REGISTER);
Emit(index);
}
void IrregexpAssembler::WriteStackPointerToRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_REGISTER_TO_SP);
Emit(index);
}
void IrregexpAssembler::ReadStackPointerFromRegister(int index) {
ASSERT(index >= 0);
Emit(BC_SET_SP_TO_REGISTER);
Emit(index);
}
void IrregexpAssembler::SetRegister(int index, int value) {
ASSERT(index >= 0);
Emit(BC_SET_REGISTER);
Emit(index);
Emit32(value);
}
void IrregexpAssembler::AdvanceRegister(int index, int by) {
ASSERT(index >= 0);
Emit(BC_ADVANCE_REGISTER);
Emit(index);
Emit32(by);
}
void IrregexpAssembler::PopCurrentPosition() {
Emit(BC_POP_CP);
}
void IrregexpAssembler::PopBacktrack() {
Emit(BC_POP_BT);
}
void IrregexpAssembler::PopRegister(int index) {
Emit(BC_POP_REGISTER);
Emit(index);
}
void IrregexpAssembler::Fail() {
Emit(BC_FAIL);
}
void IrregexpAssembler::Break() {
Emit(BC_BREAK);
}
void IrregexpAssembler::Succeed() {
Emit(BC_SUCCEED);
}
void IrregexpAssembler::Bind(Label* l) {
ASSERT(!l->is_bound());
if (l->is_linked()) {
int pos = l->pos();
while (pos != 0) {
int fixup = pos;
pos = Load32(buffer_.start() + fixup);
Store32(buffer_.start() + fixup, pc_);
}
}
l->bind_to(pc_);
}
void IrregexpAssembler::AdvanceCP(int cp_offset) {
Emit(BC_ADVANCE_CP);
Emit32(cp_offset);
}
void IrregexpAssembler::GoTo(Label* l) {
Emit(BC_GOTO);
EmitOrLink(l);
}
void IrregexpAssembler::LoadCurrentChar(int cp_offset, Label* on_end) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset);
EmitOrLink(on_end);
}
void IrregexpAssembler::CheckCharacter(uc16 c, Label* on_match) {
Emit(BC_CHECK_CHAR);
Emit16(c);
EmitOrLink(on_match);
}
void IrregexpAssembler::CheckNotCharacter(uc16 c, Label* on_mismatch) {
Emit(BC_CHECK_NOT_CHAR);
Emit16(c);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::OrThenCheckNotCharacter(uc16 c,
uc16 mask,
Label* on_mismatch) {
Emit(BC_OR_CHECK_NOT_CHAR);
Emit16(c);
Emit16(mask);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::MinusOrThenCheckNotCharacter(uc16 c,
uc16 mask,
Label* on_mismatch) {
Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
Emit16(c);
Emit16(mask);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
Emit(BC_CHECK_LT);
Emit16(limit);
EmitOrLink(on_less);
}
void IrregexpAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
Emit(BC_CHECK_GT);
Emit16(limit);
EmitOrLink(on_greater);
}
void IrregexpAssembler::CheckNotBackReference(int capture_index,
Label* on_mismatch) {
Emit(BC_CHECK_NOT_BACK_REF);
Emit(capture_index);
EmitOrLink(on_mismatch);
}
void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
Label* on_true) {
Emit(byte_code);
Emit(reg_index);
Emit16(vs);
EmitOrLink(on_true);
}
void IrregexpAssembler::CheckRegisterLT(int reg_index,
uint16_t vs,
Label* on_less_than) {
CheckRegister(BC_CHECK_REGISTER_LT, reg_index, vs, on_less_than);
}
void IrregexpAssembler::CheckRegisterGE(int reg_index,
uint16_t vs,
Label* on_greater_than_equal) {
CheckRegister(BC_CHECK_REGISTER_GE, reg_index, vs, on_greater_than_equal);
}
void IrregexpAssembler::LookupMap1(uc16 start, Label* bit_map, Label* on_zero) {
Emit(BC_LOOKUP_MAP1);
Emit16(start);
EmitOrLink(bit_map);
EmitOrLink(on_zero);
}
void IrregexpAssembler::LookupMap2(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_MAP2);
Emit16(start);
EmitOrLink(half_nibble_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 4);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
void IrregexpAssembler::LookupMap8(uc16 start,
Label* byte_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_MAP8);
Emit16(start);
EmitOrLink(byte_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 256);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
void IrregexpAssembler::LookupHighMap8(byte start,
Label* byte_map,
const Vector<Label*>& table) {
Emit(BC_LOOKUP_HI_MAP8);
Emit(start);
EmitOrLink(byte_map);
ASSERT(table.length() > 0);
ASSERT(table.length() <= 256);
for (int i = 0; i < table.length(); i++) {
EmitOrLink(table[i]);
}
}
int IrregexpAssembler::length() {
return pc_;
}
void IrregexpAssembler::Copy(Address a) {
memcpy(a, buffer_.start(), length());
}
void IrregexpAssembler::Expand() {
bool old_buffer_was_our_own = own_buffer_;
Vector<byte> old_buffer = buffer_;
buffer_ = Vector<byte>::New(old_buffer.length() * 2);
own_buffer_ = true;
memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
if (old_buffer_was_our_own) {
old_buffer.Dispose();
}
}
} } // namespace v8::internal

137
src/assembler-irregexp.h Normal file
View File

@ -0,0 +1,137 @@
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// A light-weight assembler for the Irregexp byte code.
#ifndef V8_ASSEMBLER_IRREGEXP_H_
#define V8_ASSEMBLER_IRREGEXP_H_
namespace v8 { namespace internal {
class IrregexpAssembler {
public:
// Create an assembler. Instructions and relocation information are emitted
// into a buffer, with the instructions starting from the beginning and the
// relocation information starting from the end of the buffer. See CodeDesc
// for a detailed comment on the layout (globals.h).
//
// If the provided buffer is NULL, the assembler allocates and grows its own
// buffer, and buffer_size determines the initial buffer size. The buffer is
// owned by the assembler and deallocated upon destruction of the assembler.
//
// If the provided buffer is not NULL, the assembler uses the provided buffer
// for code generation and assumes its size to be buffer_size. If the buffer
// is too small, a fatal error occurs. No deallocation of the buffer is done
// upon destruction of the assembler.
explicit IrregexpAssembler(Vector<byte>);
~IrregexpAssembler();
// CP = current position in source.
// BT = backtrack label.
// Stack.
void PushCurrentPosition(int cp_offset = 0);
void PushBacktrack(Label* l);
void PushRegister(int index);
void WriteCurrentPositionToRegister(int index, int cp_offset = 0);
void ReadCurrentPositionFromRegister(int index);
void WriteStackPointerToRegister(int index);
void ReadStackPointerFromRegister(int index);
void SetRegister(int index, int value);
void AdvanceRegister(int index, int by);
void PopCurrentPosition();
void PopBacktrack();
void PopRegister(int index);
void Fail();
void Succeed();
void Break(); // This instruction will cause a fatal VM error if hit.
void Bind(Label* l); // Binds an unbound label L to the current code posn.
void AdvanceCP(int by);
void GoTo(Label* l);
// Loads current char into a machine register. Jumps to the label if we
// reached the end of the subject string. Fall through otherwise.
void LoadCurrentChar(int cp_offset, Label* on_end);
// Checks current char register against a singleton.
void CheckCharacter(uc16 c, Label* on_match);
void CheckNotCharacter(uc16 c, Label* on_mismatch);
void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
// Used to check current char register against a range.
void CheckCharacterLT(uc16 limit, Label* on_less);
void CheckCharacterGT(uc16 limit, Label* on_greater);
// Checks current position for a match against a
// previous capture. Advances current position by the length of the capture
// iff it matches. The capture is stored in a given register and the
// the register after. If a register contains -1 then the other register
// must always contain -1 and the on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
// Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
void CheckRegisterGE(int reg_index, uint16_t vs, Label* on_greater_equal);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a bit array, uses the result of that decide whether to fall
// though (on 1) or jump to the on_zero label (on 0).
void LookupMap1(uc16 start, Label* bit_map, Label* on_zero);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a 2-bit array, uses the result of that to look up in a label
// table and jumps to the label.
void LookupMap2(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table);
// Subtracts a 16 bit value from the current character, uses the result to
// look up in a byte array, uses the result of that to look up in a label
// array and jumps to the label.
void LookupMap8(uc16 start, Label* byte_map, const Vector<Label*>& table);
// Takes the high byte of the current character, uses the result to
// look up in a byte array, uses the result of that to look up in a label
// array and jumps to the label.
void LookupHighMap8(byte start, Label* byte_map, const Vector<Label*>& table);
// Code and bitmap emission.
inline void Emit32(uint32_t x);
inline void Emit16(uint32_t x);
inline void Emit(uint32_t x);
// Bytecode buffer.
int length();
void Copy(Address a);
inline void EmitOrLink(Label* l);
private:
// Don't use this.
IrregexpAssembler() { UNREACHABLE(); }
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
inline void CheckRegister(int byte_code,
int reg_index,
uint16_t vs,
Label* on_true);
// Code generation.
int pc_; // The program counter; moves forward.
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
void Expand();
};
} } // namespace v8::internal
#endif // V8_ASSEMBLER_IRREGEXP_H_

View File

@ -50,7 +50,8 @@ namespace v8 { namespace internal {
class Label : public ZoneObject { // LabelShadows are dynamically allocated.
public:
INLINE(Label()) { Unuse(); }
INLINE(Label())
{ Unuse(); }
INLINE(~Label()) { ASSERT(!is_linked()); }
INLINE(void Unuse()) { pos_ = 0; }
@ -82,8 +83,10 @@ class Label : public ZoneObject { // LabelShadows are dynamically allocated.
}
friend class Assembler;
friend class RegexpAssembler;
friend class Displacement;
friend class LabelShadow;
friend class IrregexpAssembler;
};

View File

@ -29,6 +29,7 @@
#include "ast.h"
#include "scopes.h"
#include "string-stream.h"
namespace v8 { namespace internal {
@ -179,4 +180,204 @@ void Visitor::VisitExpressions(ZoneList<Expression*>* expressions) {
}
// ----------------------------------------------------------------------------
// Regular expressions
#define MAKE_ACCEPT(Name) \
void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
return visitor->Visit##Name(this, data); \
}
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
#undef MAKE_ACCEPT
#define MAKE_TYPE_CASE(Name) \
RegExp##Name* RegExpTree::As##Name() { \
return NULL; \
} \
bool RegExpTree::Is##Name() { return false; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
#undef MAKE_TYPE_CASE
#define MAKE_TYPE_CASE(Name) \
RegExp##Name* RegExp##Name::As##Name() { \
return this; \
} \
bool RegExp##Name::Is##Name() { return true; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
#undef MAKE_TYPE_CASE
RegExpEmpty RegExpEmpty::kInstance;
// Convert regular expression trees to a simple sexp representation.
// This representation should be different from the input grammar
// in as many cases as possible, to make it more difficult for incorrect
// parses to look as correct ones which is likely if the input and
// output formats are alike.
class RegExpUnparser: public RegExpVisitor {
public:
RegExpUnparser();
void VisitCharacterRange(CharacterRange that);
SmartPointer<const char> ToString() { return stream_.ToCString(); }
#define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data);
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
#undef MAKE_CASE
private:
StringStream* stream() { return &stream_; }
HeapStringAllocator alloc_;
StringStream stream_;
};
RegExpUnparser::RegExpUnparser() : stream_(&alloc_) {
}
void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
stream()->Add("(|");
for (int i = 0; i < that->alternatives()->length(); i++) {
stream()->Add(" ");
that->alternatives()->at(i)->Accept(this, data);
}
stream()->Add(")");
return NULL;
}
void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
stream()->Add("(:");
for (int i = 0; i < that->nodes()->length(); i++) {
stream()->Add(" ");
that->nodes()->at(i)->Accept(this, data);
}
stream()->Add(")");
return NULL;
}
void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
stream()->Add("%k", that.from());
if (!that.IsSingleton()) {
stream()->Add("-%k", that.to());
}
}
void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
void* data) {
if (that->is_negated())
stream()->Add("^");
stream()->Add("[");
for (int i = 0; i < that->ranges()->length(); i++) {
if (i > 0) stream()->Add(" ");
VisitCharacterRange(that->ranges()->at(i));
}
stream()->Add("]");
return NULL;
}
void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
switch (that->type()) {
case RegExpAssertion::START_OF_INPUT:
stream()->Add("@^i");
break;
case RegExpAssertion::END_OF_INPUT:
stream()->Add("@$i");
break;
case RegExpAssertion::START_OF_LINE:
stream()->Add("@^l");
break;
case RegExpAssertion::END_OF_LINE:
stream()->Add("@$l");
break;
case RegExpAssertion::BOUNDARY:
stream()->Add("@b");
break;
case RegExpAssertion::NON_BOUNDARY:
stream()->Add("@B");
break;
}
return NULL;
}
void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
stream()->Add("'");
Vector<const uc16> chardata = that->data();
for (int i = 0; i < chardata.length(); i++) {
stream()->Add("%k", chardata[i]);
}
stream()->Add("'");
return NULL;
}
void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
if (that->elements()->length() == 1) {
that->elements()->at(0).data.u_atom->Accept(this, data);
} else {
stream()->Add("(!");
for (int i = 0; i < that->elements()->length(); i++) {
stream()->Add(" ");
that->elements()->at(i).data.u_atom->Accept(this, data);
}
stream()->Add(")");
}
return NULL;
}
void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
stream()->Add("(# %i ", that->min());
if (that->max() == RegExpQuantifier::kInfinity) {
stream()->Add("- ");
} else {
stream()->Add("%i ", that->max());
}
stream()->Add(that->is_greedy() ? "g " : "n ");
that->body()->Accept(this, data);
stream()->Add(")");
return NULL;
}
void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
stream()->Add("(^ ");
that->body()->Accept(this, data);
stream()->Add(")");
return NULL;
}
void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
stream()->Add("(-> ");
stream()->Add(that->is_positive() ? "+ " : "- ");
that->body()->Accept(this, data);
stream()->Add(")");
return NULL;
}
void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
void* data) {
stream()->Add("(<- %i)", that->index());
return NULL;
}
void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
stream()->Put('%');
return NULL;
}
SmartPointer<const char> RegExpTree::ToString() {
RegExpUnparser unparser;
Accept(&unparser, NULL);
return unparser.ToString();
}
} } // namespace v8::internal

263
src/ast.h
View File

@ -34,6 +34,7 @@
#include "token.h"
#include "variables.h"
#include "macro-assembler.h"
#include "jsregexp.h"
namespace v8 { namespace internal {
@ -1191,6 +1192,268 @@ class ThisFunction: public Expression {
};
// ----------------------------------------------------------------------------
// Regular expressions
class RegExpTree: public ZoneObject {
public:
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure) = 0;
virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
#define MAKE_ASTYPE(Name) \
virtual RegExp##Name* As##Name(); \
virtual bool Is##Name();
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
#undef MAKE_ASTYPE
};
class RegExpDisjunction: public RegExpTree {
public:
explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
: alternatives_(alternatives) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
ZoneList<RegExpTree*>* alternatives_;
};
class RegExpAlternative: public RegExpTree {
public:
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
};
class RegExpText: public RegExpTree {
public:
RegExpText() : elements_(2) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpText* AsText();
virtual bool IsText();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
void AddElement(TextElement elm) { elements_.Add(elm); }
ZoneList<TextElement>* elements() { return &elements_; }
private:
ZoneList<TextElement> elements_;
};
class RegExpAssertion: public RegExpTree {
public:
enum Type {
START_OF_LINE, START_OF_INPUT, END_OF_LINE, END_OF_INPUT,
BOUNDARY, NON_BOUNDARY
};
explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
Type type() { return type_; }
private:
Type type_;
};
class RegExpCharacterClass: public RegExpTree {
public:
RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
: ranges_(ranges),
is_negated_(is_negated) { }
explicit RegExpCharacterClass(uc16 type)
: ranges_(new ZoneList<CharacterRange>(2)),
is_negated_(false) {
CharacterRange::AddClassEscape(type, ranges_);
}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
ZoneList<CharacterRange>* ranges() { return ranges_; }
bool is_negated() { return is_negated_; }
private:
ZoneList<CharacterRange>* ranges_;
bool is_negated_;
};
class RegExpAtom: public RegExpTree {
public:
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAtom* AsAtom();
virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
Vector<const uc16> data() { return data_; }
private:
Vector<const uc16> data_;
};
class RegExpQuantifier: public RegExpTree {
public:
RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
: min_(min),
max_(max),
is_greedy_(is_greedy),
body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
static RegExpNode* ToNode(int min,
int max,
bool is_greedy,
RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier();
int min() { return min_; }
int max() { return max_; }
bool is_greedy() { return is_greedy_; }
RegExpTree* body() { return body_; }
// We just use a very large integer value as infinity because 2^30
// is infinite in practice.
static const int kInfinity = (1 << 30);
private:
int min_;
int max_;
bool is_greedy_;
RegExpTree* body_;
};
enum CaptureAvailability {
CAPTURE_AVAILABLE, CAPTURE_UNREACHABLE, CAPTURE_PERMANENTLY_UNREACHABLE };
class RegExpCapture: public RegExpTree {
public:
explicit RegExpCapture(RegExpTree* body, int index)
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
static RegExpNode* ToNode(RegExpTree* body,
int index,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpCapture* AsCapture();
virtual bool IsCapture();
RegExpTree* body() { return body_; }
int index() { return index_; }
inline CaptureAvailability available() { return available_; }
inline void set_available(CaptureAvailability availability) {
available_ = availability;
}
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
private:
RegExpTree* body_;
int index_;
CaptureAvailability available_;
};
class RegExpLookahead: public RegExpTree {
public:
RegExpLookahead(RegExpTree* body, bool is_positive)
: body_(body),
is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead();
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
private:
RegExpTree* body_;
bool is_positive_;
};
class RegExpBackReference: public RegExpTree {
public:
explicit RegExpBackReference(RegExpCapture* capture)
: capture_(capture) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
RegExpCapture* capture_;
};
class RegExpEmpty: public RegExpTree {
public:
RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
static RegExpEmpty kInstance;
};
class RegExpVisitor BASE_EMBEDDED {
public:
virtual ~RegExpVisitor() { }
#define MAKE_CASE(Name) \
virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
#undef MAKE_CASE
};
// ----------------------------------------------------------------------------
// Basic visitor
// - leaf node visitors are abstract.

View File

@ -647,7 +647,7 @@ void Builtins::Setup(bool create_heap_objects) {
// During startup it's OK to always allocate and defer GC to later.
// This simplifies things because we don't need to retry.
AlwaysAllocateScope __scope__;
code = Heap::CreateCode(desc, NULL, flags);
code = Heap::CreateCode(desc, NULL, flags, NULL);
if (code->IsFailure()) {
v8::internal::V8::FatalProcessOutOfMemory("CreateCode");
}

78
src/bytecodes-irregexp.h Normal file
View File

@ -0,0 +1,78 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_BYTECODES_IRREGEXP_H_
#define V8_BYTECODES_IRREGEXP_H_
namespace v8 { namespace internal {
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 1) /* break */ \
V(PUSH_CP, 1, 5) /* push_cp offset32 */ \
V(PUSH_BT, 2, 5) /* push_bt addr32 */ \
V(PUSH_REGISTER, 3, 2) /* push_register register_index */ \
V(SET_REGISTER_TO_CP, 4, 6) /* set_register_to_cp register_index offset32 */ \
V(SET_CP_TO_REGISTER, 5, 2) /* set_cp_to_registger register_index */ \
V(SET_REGISTER_TO_SP, 6, 2) /* set_register_to_sp register_index */ \
V(SET_SP_TO_REGISTER, 7, 2) /* set_sp_to_registger register_index */ \
V(SET_REGISTER, 8, 6) /* set_register register_index value32 */ \
V(ADVANCE_REGISTER, 9, 6) /* advance_register register_index value32 */ \
V(POP_CP, 10, 1) /* pop_cp */ \
V(POP_BT, 11, 1) /* pop_bt */ \
V(POP_REGISTER, 12, 2) /* pop_register register_index */ \
V(FAIL, 13, 1) /* fail */ \
V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(LOOKUP_MAP1, 25, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 26, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 27, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 28, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16 addr32 */ \
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
BYTECODE_ITERATOR(DECLARE_BYTECODES)
#undef DECLARE_BYTECODES
#define DECLARE_BYTECODE_LENGTH(name, code, length) \
static const int BC_##name##_LENGTH = length;
BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
#undef DECLARE_BYTECODE_LENGTH
} }
#endif // V8_BYTECODES_IRREGEXP_H_

View File

@ -237,12 +237,14 @@ template <int> class StaticAssertionHelper { };
// The ASSERT macro is equivalent to CHECK except that it only
// generates code in debug builds. Ditto STATIC_ASSERT.
#ifdef DEBUG
#define ASSERT_RESULT(expr) CHECK(expr)
#define ASSERT(condition) CHECK(condition)
#define ASSERT_EQ(v1, v2) CHECK_EQ(v1, v2)
#define ASSERT_NE(v1, v2) CHECK_NE(v1, v2)
#define STATIC_ASSERT(test) STATIC_CHECK(test)
#define SLOW_ASSERT(condition) if (FLAG_enable_slow_asserts) CHECK(condition)
#else
#define ASSERT_RESULT(expr) (expr)
#define ASSERT(condition) ((void) 0)
#define ASSERT_EQ(v1, v2) ((void) 0)
#define ASSERT_NE(v1, v2) ((void) 0)
@ -256,4 +258,6 @@ template <int> class StaticAssertionHelper { };
#define ASSERT_SIZE_TAG_ALIGNED(size) ASSERT((size & kHeapObjectTagMask) == 0)
#define ASSERT_NOT_NULL(p) ASSERT_NE(NULL, p)
#endif // V8_CHECKS_H_

View File

@ -120,7 +120,7 @@ typedef int32_t instr_t;
// bits.
//
// bool InstructionSetsConditionCodes(byte* ptr) {
// Instr *instr = Instr::At(ptr);
// Instr* instr = Instr::At(ptr);
// int type = instr->TypeField();
// return ((type == 0) || (type == 1)) && instr->HasS();
// }

View File

@ -170,9 +170,9 @@ Handle<Proxy> Factory::NewProxy(const AccessorDescriptor* desc) {
}
Handle<ByteArray> Factory::NewByteArray(int length) {
Handle<ByteArray> Factory::NewByteArray(int length, PretenureFlag pretenure) {
ASSERT(0 <= length);
CALL_HEAP_FUNCTION(Heap::AllocateByteArray(length), ByteArray);
CALL_HEAP_FUNCTION(Heap::AllocateByteArray(length, pretenure), ByteArray);
}
@ -457,9 +457,15 @@ Handle<JSFunction> Factory::NewFunctionWithPrototype(Handle<String> name,
}
Handle<Code> Factory::NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
Code::Flags flags, Handle<Object> self_ref) {
CALL_HEAP_FUNCTION(Heap::CreateCode(
desc, sinfo, flags, reinterpret_cast<Code**>(self_ref.location())), Code);
}
Handle<Code> Factory::NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
Code::Flags flags) {
CALL_HEAP_FUNCTION(Heap::CreateCode(desc, sinfo, flags), Code);
CALL_HEAP_FUNCTION(Heap::CreateCode(desc, sinfo, flags, NULL), Code);
}
@ -706,8 +712,11 @@ Handle<JSFunction> Factory::CreateApiFunction(
ASSERT(type != INVALID_TYPE);
Handle<JSFunction> result =
Factory::NewFunction(Factory::empty_symbol(), type, instance_size,
code, true);
Factory::NewFunction(Factory::empty_symbol(),
type,
instance_size,
code,
true);
// Set class name.
Handle<Object> class_name = Handle<Object>(obj->class_name());
if (class_name->IsString()) {

View File

@ -147,7 +147,8 @@ class Factory : public AllStatic {
// the old generation).
static Handle<Proxy> NewProxy(const AccessorDescriptor* proxy);
static Handle<ByteArray> NewByteArray(int length);
static Handle<ByteArray> NewByteArray(int length,
PretenureFlag pretenure = NOT_TENURED);
static Handle<Map> NewMap(InstanceType type, int instance_size);
@ -205,6 +206,9 @@ class Factory : public AllStatic {
Handle<JSFunction> boilerplate,
Handle<Context> context);
static Handle<Code> NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
Code::Flags flags, Handle<Object> self_reference);
static Handle<Code> NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
Code::Flags flags);

View File

@ -289,6 +289,12 @@ DEFINE_bool(collect_heap_spill_statistics, false,
"report heap spill statistics along with heap_stats "
"(requires heap_stats)")
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode executon")
DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case independent")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
//
// Logging and profiling only flags
//

View File

@ -178,10 +178,16 @@ class Map;
class MapSpace;
class MarkCompactCollector;
class NewSpace;
class NodeVisitor;
class Object;
class OldSpace;
class Property;
class Proxy;
class RegExpNode;
struct RegExpParseResult;
class RegExpTree;
class RegExpCompiler;
class RegExpVisitor;
class Scope;
template<class Allocator = FreeStoreAllocationPolicy> class ScopeInfo;
class Script;

View File

@ -392,8 +392,7 @@ void Heap::PerformGarbageCollection(AllocationSpace space,
}
Counters::objs_since_last_young.Set(0);
// Process weak handles post gc.
GlobalHandles::PostGarbageCollectionProcessing();
PostGarbageCollectionProcessing();
if (collector == MARK_COMPACTOR) {
// Register the amount of external allocated memory.
@ -408,6 +407,14 @@ void Heap::PerformGarbageCollection(AllocationSpace space,
}
void Heap::PostGarbageCollectionProcessing() {
// Process weak handles post gc.
GlobalHandles::PostGarbageCollectionProcessing();
// Update flat string readers.
FlatStringReader::PostGarbageCollectionProcessing();
}
void Heap::MarkCompact(GCTracer* tracer) {
gc_state_ = MARK_COMPACT;
mc_count_++;
@ -1582,6 +1589,24 @@ Object* Heap::LookupSingleCharacterStringFromCode(uint16_t code) {
}
Object* Heap::AllocateByteArray(int length, PretenureFlag pretenure) {
if (pretenure == NOT_TENURED) {
return AllocateByteArray(length);
}
int size = ByteArray::SizeFor(length);
AllocationSpace space =
size > MaxHeapObjectSize() ? LO_SPACE : OLD_DATA_SPACE;
Object* result = AllocateRaw(size, space, OLD_DATA_SPACE);
if (result->IsFailure()) return result;
reinterpret_cast<Array*>(result)->set_map(byte_array_map());
reinterpret_cast<Array*>(result)->set_length(length);
return result;
}
Object* Heap::AllocateByteArray(int length) {
int size = ByteArray::SizeFor(length);
AllocationSpace space =
@ -1599,7 +1624,8 @@ Object* Heap::AllocateByteArray(int length) {
Object* Heap::CreateCode(const CodeDesc& desc,
ScopeInfo<>* sinfo,
Code::Flags flags) {
Code::Flags flags,
Code** self_reference) {
// Compute size
int body_size = RoundUp(desc.instr_size + desc.reloc_size, kObjectAlignment);
int sinfo_size = 0;
@ -1622,7 +1648,16 @@ Object* Heap::CreateCode(const CodeDesc& desc,
code->set_sinfo_size(sinfo_size);
code->set_flags(flags);
code->set_ic_flag(Code::IC_TARGET_IS_ADDRESS);
code->CopyFrom(desc); // migrate generated code
// Allow self references to created code object.
if (self_reference != NULL) {
*self_reference = code;
}
// Migrate generated code.
// The generated code can contain Object** values (typically from handles)
// that are dereferenced during the copy to point directly to the actual heap
// objects. These pointers can include references to the code object itself,
// through the self_reference parameter.
code->CopyFrom(desc);
if (sinfo != NULL) sinfo->Serialize(code); // write scope info
#ifdef DEBUG

View File

@ -391,7 +391,13 @@ class Heap : public AllStatic {
// Allocate a byte array of the specified length
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
// failed.
// Please not this does not perform a garbage collection.
// Please note this does not perform a garbage collection.
static Object* AllocateByteArray(int length, PretenureFlag pretenure);
// Allocate a non-tenured byte array of the specified length
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
// failed.
// Please note this does not perform a garbage collection.
static Object* AllocateByteArray(int length);
// Allocates a fixed array initialized with undefined values
@ -549,11 +555,14 @@ class Heap : public AllStatic {
// Makes a new native code object
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
// failed.
// failed. On success, the pointer to the Code object is stored in the
// self_reference. This allows generated code to reference its own Code
// object by containing this pointer.
// Please note this function does not perform a garbage collection.
static Object* CreateCode(const CodeDesc& desc,
ScopeInfo<>* sinfo,
Code::Flags flags);
Code::Flags flags,
Code** self_reference = NULL);
static Object* CopyCode(Code* code);
// Finds the symbol for string in the symbol table.
@ -582,6 +591,9 @@ class Heap : public AllStatic {
static void GarbageCollectionPrologue();
static void GarbageCollectionEpilogue();
// Code that should be executed after the garbage collection proper.
static void PostGarbageCollectionProcessing();
// Performs garbage collection operation.
// Returns whether required_space bytes are available after the collection.
static bool CollectGarbage(int required_space, AllocationSpace space);

347
src/interpreter-irregexp.cc Normal file
View File

@ -0,0 +1,347 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A simple interpreter for the Irregexp byte code.
#include "v8.h"
#include "utils.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "interpreter-irregexp.h"
namespace v8 { namespace internal {
#ifdef DEBUG
static void TraceInterpreter(const byte* code_base,
const byte* pc,
int stack_depth,
int current_position,
int bytecode_length,
const char* bytecode_name) {
if (FLAG_trace_regexp_bytecodes) {
PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
pc - code_base,
stack_depth,
current_position,
bytecode_name);
for (int i = 1; i < bytecode_length; i++) {
printf(", %02x", pc[i]);
}
printf("\n");
}
}
# define BYTECODE(name) case BC_##name: \
TraceInterpreter(code_base, \
pc, \
backtrack_sp - backtrack_stack, \
current, \
BC_##name##_LENGTH, \
#name);
#else
# define BYTECODE(name) case BC_##name: // NOLINT
#endif
static bool RawMatch(const byte* code_base,
Vector<const uc16> subject,
int* registers,
int current) {
const byte* pc = code_base;
static const int kBacktrackStackSize = 10000;
int backtrack_stack[kBacktrackStackSize];
int backtrack_stack_space = kBacktrackStackSize;
int* backtrack_sp = backtrack_stack;
int current_char = -1;
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
PrintF("\n\nStart bytecode interpreter\n\n");
}
#endif
while (true) {
switch (*pc) {
BYTECODE(BREAK)
UNREACHABLE();
return false;
BYTECODE(PUSH_CP)
if (--backtrack_stack_space < 0) {
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = current + Load32(pc + 1);
pc += BC_PUSH_CP_LENGTH;
break;
BYTECODE(PUSH_BT)
if (--backtrack_stack_space < 0) {
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = Load32(pc + 1);
pc += BC_PUSH_BT_LENGTH;
break;
BYTECODE(PUSH_REGISTER)
if (--backtrack_stack_space < 0) {
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = registers[pc[1]];
pc += BC_PUSH_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER)
registers[pc[1]] = Load32(pc + 2);
pc += BC_SET_REGISTER_LENGTH;
break;
BYTECODE(ADVANCE_REGISTER)
registers[pc[1]] += Load32(pc + 2);
pc += BC_ADVANCE_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_CP)
registers[pc[1]] = current + Load32(pc + 2);
pc += BC_SET_REGISTER_TO_CP_LENGTH;
break;
BYTECODE(SET_CP_TO_REGISTER)
current = registers[pc[1]];
pc += BC_SET_CP_TO_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_SP)
registers[pc[1]] = backtrack_sp - backtrack_stack;
pc += BC_SET_REGISTER_TO_SP_LENGTH;
break;
BYTECODE(SET_SP_TO_REGISTER)
backtrack_sp = backtrack_stack + registers[pc[1]];
backtrack_stack_space = kBacktrackStackSize -
(backtrack_sp - backtrack_stack);
pc += BC_SET_SP_TO_REGISTER_LENGTH;
break;
BYTECODE(POP_CP)
backtrack_stack_space++;
--backtrack_sp;
current = *backtrack_sp;
pc += BC_POP_CP_LENGTH;
break;
BYTECODE(POP_BT)
backtrack_stack_space++;
--backtrack_sp;
pc = code_base + *backtrack_sp;
break;
BYTECODE(POP_REGISTER)
backtrack_stack_space++;
--backtrack_sp;
registers[pc[1]] = *backtrack_sp;
pc += BC_POP_REGISTER_LENGTH;
break;
BYTECODE(FAIL)
return false;
BYTECODE(SUCCEED)
return true;
BYTECODE(ADVANCE_CP)
current += Load32(pc + 1);
pc += BC_ADVANCE_CP_LENGTH;
break;
BYTECODE(GOTO)
pc = code_base + Load32(pc + 1);
break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1);
if (pos >= subject.length()) {
pc = code_base + Load32(pc + 5);
} else {
current_char = subject[pos];
pc += BC_LOAD_CURRENT_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
if (c == current_char) {
pc = code_base + Load32(pc + 3);
} else {
pc += BC_CHECK_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
if (c != current_char) {
pc = code_base + Load32(pc + 3);
} else {
pc += BC_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(OR_CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
if (c != (current_char | Load16(pc + 3))) {
pc = code_base + Load32(pc + 5);
} else {
pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
int m = Load16(pc + 3);
if (c != ((current_char - m) | m)) {
pc = code_base + Load32(pc + 5);
} else {
pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
}
break;
}
BYTECODE(CHECK_LT) {
int limit = Load16(pc + 1);
if (current_char < limit) {
pc = code_base + Load32(pc + 3);
} else {
pc += BC_CHECK_LT_LENGTH;
}
break;
}
BYTECODE(CHECK_GT) {
int limit = Load16(pc + 1);
if (current_char > limit) {
pc = code_base + Load32(pc + 3);
} else {
pc += BC_CHECK_GT_LENGTH;
}
break;
}
BYTECODE(CHECK_REGISTER_LT)
if (registers[pc[1]] < Load16(pc + 2)) {
pc = code_base + Load32(pc + 4);
} else {
pc += BC_CHECK_REGISTER_LT_LENGTH;
}
break;
BYTECODE(CHECK_REGISTER_GE)
if (registers[pc[1]] >= Load16(pc + 2)) {
pc = code_base + Load32(pc + 4);
} else {
pc += BC_CHECK_REGISTER_GE_LENGTH;
}
break;
BYTECODE(LOOKUP_MAP1) {
// Look up character in a bitmap. If we find a 0, then jump to the
// location at pc + 7. Otherwise fall through!
int index = current_char - Load16(pc + 1);
byte map = code_base[Load32(pc + 3) + (index >> 3)];
map = ((map >> (index & 7)) & 1);
if (map == 0) {
pc = code_base + Load32(pc + 7);
} else {
pc += BC_LOOKUP_MAP1_LENGTH;
}
break;
}
BYTECODE(LOOKUP_MAP2) {
// Look up character in a half-nibble map. If we find 00, then jump to
// the location at pc + 7. If we find 01 then jump to location at
// pc + 11, etc.
int index = (current_char - Load16(pc + 1)) << 1;
byte map = code_base[Load32(pc + 3) + (index >> 3)];
map = ((map >> (index & 7)) & 3);
if (map < 2) {
if (map == 0) {
pc = code_base + Load32(pc + 7);
} else {
pc = code_base + Load32(pc + 11);
}
} else {
if (map == 2) {
pc = code_base + Load32(pc + 15);
} else {
pc = code_base + Load32(pc + 19);
}
}
break;
}
BYTECODE(LOOKUP_MAP8) {
// Look up character in a byte map. Use the byte as an index into a
// table that follows this instruction immediately.
int index = current_char - Load16(pc + 1);
byte map = code_base[Load32(pc + 3) + index];
const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
pc = code_base + Load32(new_pc);
break;
}
BYTECODE(LOOKUP_HI_MAP8) {
// Look up high byte of this character in a byte map. Use the byte as
// an index into a table that follows this instruction immediately.
int index = (current_char >> 8) - pc[1];
byte map = code_base[Load32(pc + 2) + index];
const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
pc = code_base + Load32(new_pc);
break;
}
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[pc[1]];
int len = registers[pc[1] + 1] - from;
if (current + len > subject.length()) {
pc = code_base + Load32(pc + 2);
break;
} else {
int i;
for (i = 0; i < len; i++) {
if (subject[from + i] != subject[current + i]) {
pc = code_base + Load32(pc + 2);
break;
}
}
if (i < len) break;
current += len;
}
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
default:
UNREACHABLE();
break;
}
}
}
bool IrregexpInterpreter::Match(Handle<ByteArray> code_array,
Handle<String> subject16,
int* registers,
int start_position) {
ASSERT(StringShape(*subject16).IsTwoByteRepresentation());
ASSERT(subject16->IsFlat(StringShape(*subject16)));
AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
return RawMatch(code_base,
Vector<const uc16>(subject16->GetTwoByteData(),
subject16->length()),
registers,
start_position);
}
} } // namespace v8::internal

View File

@ -0,0 +1,47 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A simple interpreter for the Regexp2000 byte code.
#ifndef V8_INTERPRETER_IRREGEXP_H_
#define V8_INTERPRETER_IRREGEXP_H_
namespace v8 { namespace internal {
class IrregexpInterpreter {
public:
static bool Match(Handle<ByteArray> code,
Handle<String> subject16,
int* captures,
int start_position);
};
} } // namespace v8::internal
#endif // V8_INTERPRETER_IRREGEXP_H_

266
src/jsregexp-inl.h Normal file
View File

@ -0,0 +1,266 @@
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_JSREGEXP_INL_H_
#define V8_JSREGEXP_INL_H_
#include "jsregexp.h"
#include "regexp-macro-assembler.h"
namespace v8 {
namespace internal {
template <typename C>
bool ZoneSplayTree<C>::Insert(const Key& key, Locator* locator) {
if (is_empty()) {
// If the tree is empty, insert the new node.
root_ = new Node(key, C::kNoValue);
} else {
// Splay on the key to move the last node on the search path
// for the key to the root of the tree.
Splay(key);
// Ignore repeated insertions with the same key.
int cmp = C::Compare(key, root_->key_);
if (cmp == 0) {
locator->bind(root_);
return false;
}
// Insert the new node.
Node* node = new Node(key, C::kNoValue);
if (cmp > 0) {
node->left_ = root_;
node->right_ = root_->right_;
root_->right_ = NULL;
} else {
node->right_ = root_;
node->left_ = root_->left_;
root_->left_ = NULL;
}
root_ = node;
}
locator->bind(root_);
return true;
}
template <typename C>
bool ZoneSplayTree<C>::Find(const Key& key, Locator* locator) {
if (is_empty())
return false;
Splay(key);
if (C::Compare(key, root_->key_) == 0) {
locator->bind(root_);
return true;
} else {
return false;
}
}
template <typename C>
bool ZoneSplayTree<C>::FindGreatestLessThan(const Key& key,
Locator* locator) {
if (is_empty())
return false;
// Splay on the key to move the node with the given key or the last
// node on the search path to the top of the tree.
Splay(key);
// Now the result is either the root node or the greatest node in
// the left subtree.
int cmp = C::Compare(root_->key_, key);
if (cmp <= 0) {
locator->bind(root_);
return true;
} else {
Node* temp = root_;
root_ = root_->left_;
bool result = FindGreatest(locator);
root_ = temp;
return result;
}
}
template <typename C>
bool ZoneSplayTree<C>::FindLeastGreaterThan(const Key& key,
Locator* locator) {
if (is_empty())
return false;
// Splay on the key to move the node with the given key or the last
// node on the search path to the top of the tree.
Splay(key);
// Now the result is either the root node or the least node in
// the right subtree.
int cmp = C::Compare(root_->key_, key);
if (cmp >= 0) {
locator->bind(root_);
return true;
} else {
Node* temp = root_;
root_ = root_->right_;
bool result = FindLeast(locator);
root_ = temp;
return result;
}
}
template <typename C>
bool ZoneSplayTree<C>::FindGreatest(Locator* locator) {
if (is_empty())
return false;
Node* current = root_;
while (current->right_ != NULL)
current = current->right_;
locator->bind(current);
return true;
}
template <typename C>
bool ZoneSplayTree<C>::FindLeast(Locator* locator) {
if (is_empty())
return false;
Node* current = root_;
while (current->left_ != NULL)
current = current->left_;
locator->bind(current);
return true;
}
template <typename C>
bool ZoneSplayTree<C>::Remove(const Key& key) {
// Bail if the tree is empty
if (is_empty())
return false;
// Splay on the key to move the node with the given key to the top.
Splay(key);
// Bail if the key is not in the tree
if (C::Compare(key, root_->key_) != 0)
return false;
if (root_->left_ == NULL) {
// No left child, so the new tree is just the right child.
root_ = root_->right_;
} else {
// Left child exists.
Node* right = root_->right_;
// Make the original left child the new root.
root_ = root_->left_;
// Splay to make sure that the new root has an empty right child.
Splay(key);
// Insert the original right child as the right child of the new
// root.
root_->right_ = right;
}
return true;
}
template <typename C>
void ZoneSplayTree<C>::Splay(const Key& key) {
if (is_empty())
return;
Node dummy_node(C::kNoKey, C::kNoValue);
// Create a dummy node. The use of the dummy node is a bit
// counter-intuitive: The right child of the dummy node will hold
// the L tree of the algorithm. The left child of the dummy node
// will hold the R tree of the algorithm. Using a dummy node, left
// and right will always be nodes and we avoid special cases.
Node* dummy = &dummy_node;
Node* left = dummy;
Node* right = dummy;
Node* current = root_;
while (true) {
int cmp = C::Compare(key, current->key_);
if (cmp < 0) {
if (current->left_ == NULL)
break;
if (C::Compare(key, current->left_->key_) < 0) {
// Rotate right.
Node* temp = current->left_;
current->left_ = temp->right_;
temp->right_ = current;
current = temp;
if (current->left_ == NULL)
break;
}
// Link right.
right->left_ = current;
right = current;
current = current->left_;
} else if (cmp > 0) {
if (current->right_ == NULL)
break;
if (C::Compare(key, current->right_->key_) > 0) {
// Rotate left.
Node* temp = current->right_;
current->right_ = temp->left_;
temp->left_ = current;
current = temp;
if (current->right_ == NULL)
break;
}
// Link left.
left->right_ = current;
left = current;
current = current->right_;
} else {
break;
}
}
// Assemble.
left->right_ = current->left_;
right->left_ = current->right_;
current->left_ = dummy->right_;
current->right_ = dummy->left_;
root_ = current;
}
template <typename Node, class Callback>
static void DoForEach(Node* node, Callback* callback) {
if (node == NULL) return;
DoForEach<Node, Callback>(node->left(), callback);
callback->Call(node->key(), node->value());
DoForEach<Node, Callback>(node->right(), callback);
}
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
macro->Bind(&label_);
}
} // namespace internal
} // namespace v8
#endif // V8_JSREGEXP_INL_H_

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,10 @@
namespace v8 { namespace internal {
class RegExpMacroAssembler;
class RegExpImpl {
public:
// Creates a regular expression literal in the old space.
@ -61,10 +65,28 @@ class RegExpImpl {
static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
// Stores an uncompiled RegExp pattern in the JSRegExp object.
// It will be compiled by JSCRE when first executed.
static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
// Stores a compiled RegExp pattern in the JSRegExp object.
// The pattern is compiled by Irregexp.
static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<FixedArray> irregexp_data);
// Compile the pattern using JSCRE and store the result in the
// JSRegExp object.
static Handle<Object> JscreCompile(Handle<JSRegExp> re);
static Handle<Object> AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
JSRegExp::Flags flags,
Handle<String> match_pattern);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
@ -72,47 +94,78 @@ class RegExpImpl {
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
static Handle<Object> JsreCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
static Handle<Object> JscreCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
// Execute a compiled JSCRE pattern.
static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
// Execute an Irregexp bytecode pattern.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
static void NewSpaceCollectionPrologue();
static void OldSpaceCollectionPrologue();
private:
// Converts a source string to a 16 bit flat string. The string
// will be either sequential or it will be a SlicedString backed
// by a flat string.
static Handle<String> StringToTwoByte(Handle<String> pattern);
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
static const int kIrregexpImplementationIndex = 0;
static const int kIrregexpNumberOfCapturesIndex = 1;
static const int kIrregexpNumberOfRegistersIndex = 2;
static const int kIrregexpCodeIndex = 3;
static const int kIrregexpDataLength = 4;
static const int kJscreNumberOfCapturesIndex = 0;
static const int kJscreInternalIndex = 1;
static const int kJscreDataLength = 2;
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
// Returns the caputure from the re.
static int JsreCapture(Handle<JSRegExp> re);
static ByteArray* JsreInternal(Handle<JSRegExp> re);
static int JscreNumberOfCaptures(Handle<JSRegExp> re);
static ByteArray* JscreInternal(Handle<JSRegExp> re);
static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
// Call jsRegExpExecute once
static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject,
int previous_index,
const uc16* utf8_subject,
int* ovector,
int ovector_length);
static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject,
int previous_index,
const uc16* utf8_subject,
int* ovector,
int ovector_length);
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject16,
int previous_index,
int* ovector,
int ovector_length);
// Set the subject cache. The previous string buffer is not deleted, so the
// caller should ensure that it doesn't leak.
static void SetSubjectCache(String* subject, char* utf8_subject,
int uft8_length, int character_position,
static void SetSubjectCache(String* subject,
char* utf8_subject,
int uft8_length,
int character_position,
int utf8_position);
// A one element cache of the last utf8_subject string and its length. The
@ -125,6 +178,599 @@ class RegExpImpl {
};
class CharacterRange {
public:
CharacterRange() : from_(0), to_(0) { }
// For compatibility with the CHECK_OK macro
CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
CharacterRange(uc16 from, uc16 to)
: from_(from),
to_(to) {
}
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
static inline CharacterRange Singleton(uc16 value) {
return CharacterRange(value, value);
}
static inline CharacterRange Range(uc16 from, uc16 to) {
ASSERT(from <= to);
return CharacterRange(from, to);
}
static inline CharacterRange Everything() {
return CharacterRange(0, 0xFFFF);
}
bool Contains(uc16 i) { return from_ <= i && i <= to_; }
uc16 from() const { return from_; }
void set_from(uc16 value) { from_ = value; }
uc16 to() const { return to_; }
void set_to(uc16 value) { to_ = value; }
bool is_valid() { return from_ <= to_; }
bool IsSingleton() { return (from_ == to_); }
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
static const int kRangeCanonicalizeMax = 0x200;
static const int kStartMarker = (1 << 24);
static const int kPayloadMask = (1 << 24) - 1;
private:
uc16 from_;
uc16 to_;
};
template <typename Node, class Callback>
static void DoForEach(Node* node, Callback* callback);
// A zone splay tree. The config type parameter encapsulates the
// different configurations of a concrete splay tree:
//
// typedef Key: the key type
// typedef Value: the value type
// static const kNoKey: the dummy key used when no key is set
// static const kNoValue: the dummy value used to initialize nodes
// int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
//
template <typename Config>
class ZoneSplayTree : public ZoneObject {
public:
typedef typename Config::Key Key;
typedef typename Config::Value Value;
class Locator;
ZoneSplayTree() : root_(NULL) { }
// Inserts the given key in this tree with the given value. Returns
// true if a node was inserted, otherwise false. If found the locator
// is enabled and provides access to the mapping for the key.
bool Insert(const Key& key, Locator* locator);
// Looks up the key in this tree and returns true if it was found,
// otherwise false. If the node is found the locator is enabled and
// provides access to the mapping for the key.
bool Find(const Key& key, Locator* locator);
// Finds the mapping with the greatest key less than or equal to the
// given key.
bool FindGreatestLessThan(const Key& key, Locator* locator);
// Find the mapping with the greatest key in this tree.
bool FindGreatest(Locator* locator);
// Finds the mapping with the least key greater than or equal to the
// given key.
bool FindLeastGreaterThan(const Key& key, Locator* locator);
// Find the mapping with the least key in this tree.
bool FindLeast(Locator* locator);
// Remove the node with the given key from the tree.
bool Remove(const Key& key);
bool is_empty() { return root_ == NULL; }
// Perform the splay operation for the given key. Moves the node with
// the given key to the top of the tree. If no node has the given
// key, the last node on the search path is moved to the top of the
// tree.
void Splay(const Key& key);
class Node : public ZoneObject {
public:
Node(const Key& key, const Value& value)
: key_(key),
value_(value),
left_(NULL),
right_(NULL) { }
Key key() { return key_; }
Value value() { return value_; }
Node* left() { return left_; }
Node* right() { return right_; }
private:
friend class ZoneSplayTree;
friend class Locator;
Key key_;
Value value_;
Node* left_;
Node* right_;
};
// A locator provides access to a node in the tree without actually
// exposing the node.
class Locator {
public:
explicit Locator(Node* node) : node_(node) { }
Locator() : node_(NULL) { }
const Key& key() { return node_->key_; }
Value& value() { return node_->value_; }
void set_value(const Value& value) { node_->value_ = value; }
inline void bind(Node* node) { node_ = node; }
private:
Node* node_;
};
template <class Callback>
void ForEach(Callback* c) {
DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
}
private:
Node* root_;
};
// A set of unsigned integers that behaves especially well on small
// integers (< 32). May do zone-allocation.
class OutSet: public ZoneObject {
public:
OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
OutSet* Extend(unsigned value);
bool Get(unsigned value);
static const unsigned kFirstLimit = 32;
private:
// Destructively set a value in this set. In most cases you want
// to use Extend instead to ensure that only one instance exists
// that contains the same values.
void Set(unsigned value);
// The successors are a list of sets that contain the same values
// as this set and the one more value that is not present in this
// set.
ZoneList<OutSet*>* successors() { return successors_; }
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
: first_(first), remaining_(remaining), successors_(NULL) { }
uint32_t first_;
ZoneList<unsigned>* remaining_;
ZoneList<OutSet*>* successors_;
};
// A mapping from integers, specified as ranges, to a set of integers.
// Used for mapping character ranges to choices.
class DispatchTable {
public:
class Entry {
public:
Entry()
: from_(0), to_(0), out_set_(NULL) { }
Entry(uc16 from, uc16 to, OutSet* out_set)
: from_(from), to_(to), out_set_(out_set) { }
uc16 from() { return from_; }
uc16 to() { return to_; }
void set_to(uc16 value) { to_ = value; }
void AddValue(int value) { out_set_ = out_set_->Extend(value); }
OutSet* out_set() { return out_set_; }
private:
uc16 from_;
uc16 to_;
OutSet* out_set_;
};
class Config {
public:
typedef uc16 Key;
typedef Entry Value;
static const uc16 kNoKey;
static const Entry kNoValue;
static inline int Compare(uc16 a, uc16 b) {
if (a == b)
return 0;
else if (a < b)
return -1;
else
return 1;
}
};
void AddRange(CharacterRange range, int value);
OutSet* Get(uc16 value);
void Dump();
template <typename Callback>
void ForEach(Callback* callback) { return tree()->ForEach(callback); }
private:
// There can't be a static empty set since it allocates its
// successors in a zone and caches them.
OutSet* empty() { return &empty_; }
OutSet empty_;
ZoneSplayTree<Config>* tree() { return &tree_; }
ZoneSplayTree<Config> tree_;
};
#define FOR_EACH_NODE_TYPE(VISIT) \
VISIT(End) \
VISIT(Action) \
VISIT(Choice) \
VISIT(BackReference) \
VISIT(Text)
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
VISIT(Disjunction) \
VISIT(Alternative) \
VISIT(Assertion) \
VISIT(CharacterClass) \
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
VISIT(Lookahead) \
VISIT(BackReference) \
VISIT(Empty) \
VISIT(Text)
#define FORWARD_DECLARE(Name) class RegExp##Name;
FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
#undef FORWARD_DECLARE
class TextElement {
public:
enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
TextElement() : type(UNINITIALIZED) { }
explicit TextElement(Type t) : type(t) { }
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
Type type;
union {
RegExpAtom* u_atom;
RegExpCharacterClass* u_char_class;
} data;
};
struct NodeInfo {
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
determine_word(false),
determine_newline(false),
determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false) { }
bool SameInterests(NodeInfo* that) {
return (follows_word_interest == that->follows_word_interest)
&& (follows_newline_interest == that->follows_newline_interest)
&& (follows_start_interest == that->follows_start_interest);
}
void AdoptInterests(NodeInfo* that) {
follows_word_interest = that->follows_word_interest;
follows_newline_interest = that->follows_newline_interest;
follows_start_interest = that->follows_start_interest;
}
bool prev_determine_word() {
return determine_word || follows_word_interest;
}
bool prev_determine_newline() {
return determine_newline || follows_newline_interest;
}
bool prev_determine_start() {
return determine_start || follows_start_interest;
}
bool being_analyzed: 1;
bool been_analyzed: 1;
bool determine_word: 1;
bool determine_newline: 1;
bool determine_start: 1;
bool follows_word_interest: 1;
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
};
STATIC_CHECK(sizeof(NodeInfo) <= sizeof(int)); // NOLINT
class SiblingList {
public:
SiblingList() : list_(NULL) { }
int length() {
return list_ == NULL ? 0 : list_->length();
}
void Ensure(RegExpNode* parent) {
if (list_ == NULL) {
list_ = new ZoneList<RegExpNode*>(2);
list_->Add(parent);
}
}
void Add(RegExpNode* node) { list_->Add(node); }
RegExpNode* Get(int index) { return list_->at(index); }
private:
ZoneList<RegExpNode*>* list_;
};
class RegExpNode: public ZoneObject {
public:
virtual ~RegExpNode() { }
virtual void Accept(NodeVisitor* visitor) = 0;
// Generates a goto to this node or actually generates the code at this point.
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool GoTo(RegExpCompiler* compiler);
Label* label();
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
RegExpNode* GetSibling(NodeInfo* info);
void EnsureSiblings() { siblings_.Ensure(this); }
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
protected:
inline void Bind(RegExpMacroAssembler* macro);
private:
Label label_;
NodeInfo info_;
SiblingList siblings_;
};
class SeqRegExpNode: public RegExpNode {
public:
explicit SeqRegExpNode(RegExpNode* on_success)
: on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual bool Emit(RegExpCompiler* compiler) { return false; }
private:
RegExpNode* on_success_;
};
class ActionNode: public SeqRegExpNode {
public:
enum Type {
STORE_REGISTER,
INCREMENT_REGISTER,
STORE_POSITION,
SAVE_POSITION,
RESTORE_POSITION,
BEGIN_SUBMATCH,
ESCAPE_SUBMATCH
};
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* SavePosition(int reg, RegExpNode* on_success);
static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success);
static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
private:
union {
struct {
int reg;
int value;
} u_store_register;
struct {
int reg;
} u_increment_register;
struct {
int reg;
} u_position_register;
struct {
int reg;
} u_submatch_stack_pointer_register;
} data_;
ActionNode(Type type, RegExpNode* on_success)
: SeqRegExpNode(on_success),
type_(type) { }
Type type_;
friend class DotPrinter;
};
class TextNode: public SeqRegExpNode {
public:
TextNode(ZoneList<TextElement>* elms,
RegExpNode* on_success,
RegExpNode* on_failure)
: SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(elms) { }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
private:
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
};
class BackReferenceNode: public SeqRegExpNode {
public:
BackReferenceNode(int start_reg,
int end_reg,
RegExpNode* on_success,
RegExpNode* on_failure)
: SeqRegExpNode(on_success),
on_failure_(on_failure),
start_reg_(start_reg),
end_reg_(end_reg) { }
virtual void Accept(NodeVisitor* visitor);
RegExpNode* on_failure() { return on_failure_; }
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
private:
RegExpNode* on_failure_;
int start_reg_;
int end_reg_;
};
class EndNode: public RegExpNode {
public:
enum Action { ACCEPT, BACKTRACK };
explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
private:
Action action_;
};
class Guard: public ZoneObject {
public:
enum Relation { LT, GEQ };
Guard(int reg, Relation op, int value)
: reg_(reg),
op_(op),
value_(value) { }
int reg() { return reg_; }
Relation op() { return op_; }
int value() { return value_; }
private:
int reg_;
Relation op_;
int value_;
};
class GuardedAlternative {
public:
explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
void AddGuard(Guard* guard);
RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
ZoneList<Guard*>* guards() { return guards_; }
private:
RegExpNode* node_;
ZoneList<Guard*>* guards_;
};
class ChoiceNode: public RegExpNode {
public:
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
: on_failure_(on_failure),
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
table_calculated_(false),
being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* table() { return &table_; }
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
bool table_calculated() { return table_calculated_; }
void set_table_calculated(bool b) { table_calculated_ = b; }
bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; }
private:
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
Label* on_failure);
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* alternatives_;
DispatchTable table_;
bool table_calculated_;
bool being_calculated_;
};
class NodeVisitor {
public:
virtual ~NodeVisitor() { }
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that) = 0;
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
};
// Node visitor used to add the start set of the alternatives to the
// dispatch table of a choice node.
class DispatchTableConstructor: public NodeVisitor {
public:
explicit DispatchTableConstructor(DispatchTable* table)
: table_(table),
choice_index_(-1) { }
void BuildTable(ChoiceNode* node);
void AddRange(CharacterRange range) {
table()->AddRange(range, choice_index_);
}
void AddInverse(ZoneList<CharacterRange>* ranges);
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
DispatchTable* table() { return table_; }
void set_choice_index(int value) { choice_index_ = value; }
protected:
DispatchTable *table_;
int choice_index_;
};
class Analysis: public NodeVisitor {
public:
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
};
struct RegExpParseResult {
RegExpTree* tree;
bool has_character_escapes;
Handle<String> error;
int capture_count;
};
class RegExpEngine: public AllStatic {
public:
static Handle<FixedArray> Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case);
static void DotPrint(const char* label, RegExpNode* node);
};
} } // namespace v8::internal
#endif // V8_JSREGEXP_H_

View File

@ -89,12 +89,19 @@ void List<T, P>::Iterate(void (*callback)(T* x)) {
}
template<typename T, class P>
bool List<T, P>::Contains(const T& elm) {
for (int i = 0; i < length_; i++) {
if (data_[i] == elm)
return true;
}
return false;
}
template<typename T, class P>
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
qsort(data_,
length_,
sizeof(T),
reinterpret_cast<int (*)(const void*, const void*)>(cmp));
ToVector().Sort(cmp);
#ifdef DEBUG
for (int i = 1; i < length_; i++)
ASSERT(cmp(&data_[i - 1], &data_[i]) <= 0);
@ -102,6 +109,12 @@ void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
}
template<typename T, class P>
void List<T, P>::Sort() {
Sort(PointerSpaceship<T>);
}
template<typename T, class P>
void List<T, P>::Initialize(int capacity) {
ASSERT(capacity >= 0);

View File

@ -46,6 +46,7 @@ namespace v8 { namespace internal {
template <typename T, class P>
class List {
public:
INLINE(explicit List(int capacity)) { Initialize(capacity); }
INLINE(~List()) { DeleteData(data_); }
@ -67,6 +68,8 @@ class List {
Vector<T> ToVector() { return Vector<T>(data_, length_); }
Vector<const T> ToConstVector() { return Vector<const T>(data_, length_); }
// Adds a copy of the given 'element' to the end of the list,
// expanding the list if necessary.
T& Add(const T& element);
@ -92,11 +95,14 @@ class List {
// Drops all but the first 'pos' elements from the list.
INLINE(void Rewind(int pos));
bool Contains(const T& elm);
// Iterate through all list entries, starting at index 0.
void Iterate(void (*callback)(T* x));
// Sort all list entries (using QuickSort)
void Sort(int (*cmp)(const T* x, const T* y));
void Sort();
INLINE(void Initialize(int capacity));

View File

@ -670,7 +670,14 @@ void JSRegExp::JSRegExpVerify() {
}
case JSRegExp::JSCRE: {
FixedArray* arr = FixedArray::cast(data());
ASSERT(arr->get(JSRegExp::kJscreDataIndex)->IsFixedArray());
Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
break;
}
case JSRegExp::IRREGEXP: {
FixedArray* arr = FixedArray::cast(data());
Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
ASSERT(jscre_data->IsFixedArray());
break;
}
default:

View File

@ -279,6 +279,16 @@ bool StringShape::IsExternalTwoByte() {
}
uc32 FlatStringReader::Get(int index) {
ASSERT(0 <= index && index <= length_);
if (is_ascii_) {
return static_cast<const byte*>(start_)[index];
} else {
return static_cast<const uc16*>(start_)[index];
}
}
bool Object::IsNumber() {
return IsSmi() || IsHeapNumber();
}
@ -1142,6 +1152,13 @@ Object* FixedArray::get(int index) {
}
void FixedArray::set(int index, Smi* value) {
ASSERT(reinterpret_cast<Object*>(value)->IsSmi());
int offset = kHeaderSize + index * kPointerSize;
WRITE_FIELD(this, offset, value);
}
void FixedArray::set(int index, Object* value) {
ASSERT(index >= 0 && index < this->length());
int offset = kHeaderSize + index * kPointerSize;
@ -1747,6 +1764,7 @@ Code::Flags Code::flags() {
void Code::set_flags(Code::Flags flags) {
STATIC_ASSERT(Code::NUMBER_OF_KINDS <= (kFlagsKindMask >> kFlagsKindShift)+1);
// Make sure that all call stubs have an arguments count.
ASSERT(ExtractKindFromFlags(flags) != CALL_IC ||
ExtractArgumentsCountFromFlags(flags) >= 0);
@ -2213,6 +2231,22 @@ JSRegExp::Type JSRegExp::TypeTag() {
}
JSRegExp::Flags JSRegExp::GetFlags() {
ASSERT(this->data()->IsFixedArray());
Object* data = this->data();
Smi* smi = Smi::cast(FixedArray::cast(data)->get(kFlagsIndex));
return Flags(smi->value());
}
String* JSRegExp::Pattern() {
ASSERT(this->data()->IsFixedArray());
Object* data = this->data();
String* pattern= String::cast(FixedArray::cast(data)->get(kSourceIndex));
return pattern;
}
Object* JSRegExp::DataAt(int index) {
ASSERT(TypeTag() != NOT_COMPILED);
return FixedArray::cast(data())->get(index);

View File

@ -3501,6 +3501,57 @@ const unibrow::byte* String::ReadBlock(String* input,
}
FlatStringReader* FlatStringReader::top_ = NULL;
FlatStringReader::FlatStringReader(Handle<String> str)
: str_(str.location()),
length_(str->length()),
prev_(top_) {
top_ = this;
RefreshState();
}
FlatStringReader::FlatStringReader(Vector<const char> input)
: str_(NULL),
is_ascii_(true),
length_(input.length()),
start_(input.start()),
prev_(top_) {
top_ = this;
}
FlatStringReader::~FlatStringReader() {
ASSERT_EQ(top_, this);
top_ = prev_;
}
void FlatStringReader::RefreshState() {
if (str_ == NULL) return;
Handle<String> str(str_);
StringShape shape(*str);
ASSERT(str->IsFlat(shape));
is_ascii_ = shape.IsAsciiRepresentation();
if (is_ascii_) {
start_ = str->ToAsciiVector().start();
} else {
start_ = str->ToUC16Vector().start();
}
}
void FlatStringReader::PostGarbageCollectionProcessing() {
FlatStringReader* current = top_;
while (current != NULL) {
current->RefreshState();
current = current->prev_;
}
}
void StringInputBuffer::Seek(unsigned pos) {
Reset(pos, input_);
}

View File

@ -1498,9 +1498,12 @@ class FixedArray: public Array {
// Setter and getter for elements.
inline Object* get(int index);
// Setter that uses write barrier.
inline void set(int index, Object* value);
// Setter with barrier mode.
// Setter that doesn't need write barrier).
inline void set(int index, Smi* value);
// Setter with explicit barrier mode.
inline void set(int index, Object* value, WriteBarrierMode mode);
// Setters for frequently used oddballs located in old space.
@ -2114,14 +2117,17 @@ class Code: public HeapObject {
CALL_IC,
STORE_IC,
KEYED_STORE_IC,
// No more than eight kinds. The value currently encoded in three bits in
// Flags.
// Pseudo-kinds.
REGEXP = BUILTIN,
FIRST_IC_KIND = LOAD_IC,
LAST_IC_KIND = KEYED_STORE_IC
};
enum {
NUMBER_OF_KINDS = LAST_IC_KIND + 1
NUMBER_OF_KINDS = KEYED_STORE_IC + 1
};
// A state indicates that inline cache in this Code object contains
@ -2272,7 +2278,6 @@ class Code: public HeapObject {
static const int kFlagsTypeMask = 0x000001C0; // 111000000
static const int kFlagsArgumentsCountMask = 0xFFFFFE00;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Code);
};
@ -2912,7 +2917,13 @@ class JSValue: public JSObject {
// Regular expressions
class JSRegExp: public JSObject {
public:
enum Type { NOT_COMPILED, JSCRE, ATOM };
// Meaning of Type:
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
// JSCRE: A complex RegExp for JSCRE
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
class Flags {
@ -2929,6 +2940,8 @@ class JSRegExp: public JSObject {
DECL_ACCESSORS(data, Object)
inline Type TypeTag();
inline Flags GetFlags();
inline String* Pattern();
inline Object* DataAt(int index);
static inline JSRegExp* cast(Object* obj);
@ -2945,10 +2958,11 @@ class JSRegExp: public JSObject {
static const int kTagIndex = 0;
static const int kSourceIndex = kTagIndex + 1;
static const int kFlagsIndex = kSourceIndex + 1;
// These two are the same since the same entry is shared for
// These three are the same since the same entry is shared for
// different purposes in different types of regexps.
static const int kAtomPatternIndex = kFlagsIndex + 1;
static const int kJscreDataIndex = kFlagsIndex + 1;
static const int kIrregexpDataIndex = kFlagsIndex + 1;
static const int kDataSize = kAtomPatternIndex + 1;
};
@ -3578,6 +3592,28 @@ class ExternalTwoByteString: public ExternalString {
};
// A flat string reader provides random access to the contents of a
// string independent of the character width of the string. The handle
// must be valid as long as the reader is being used.
class FlatStringReader BASE_EMBEDDED {
public:
explicit FlatStringReader(Handle<String> str);
explicit FlatStringReader(Vector<const char> input);
~FlatStringReader();
void RefreshState();
inline uc32 Get(int index);
int length() { return length_; }
static void PostGarbageCollectionProcessing();
private:
String** str_;
bool is_ascii_;
int length_;
const void* start_;
FlatStringReader* prev_;
static FlatStringReader* top_;
};
// Note that StringInputBuffers are not valid across a GC! To fix this
// it would have to store a String Handle instead of a String* and
// AsciiStringReadBlock would have to be modified to use memcpy.

File diff suppressed because it is too large Load Diff

View File

@ -145,6 +145,9 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
v8::Extension* extension);
bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
// Support for doing lazy compilation. The script is the script containing full
// source of the script where the function is declared. The start_position and
// end_position specifies the part of the script source which has the source

View File

@ -0,0 +1,605 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include "v8.h"
#include "log.h"
#include "ast.h"
#include "macro-assembler.h"
#include "regexp-macro-assembler-ia32.h"
namespace v8 { namespace internal {
/*
* This assembler uses the following register assignment convention
* - edx : current character, or kEndOfInput if current position is not
* inside string. The kEndOfInput value is greater than 0xffff,
* so any tests that don't check whether the current position
* is inside the correct range should retain bits above the
* 15th in their computations, and fail if the value is too
* great.
* - edi : current position in input, as negative offset from end of string.
* - esi : end of input (points to byte after last character in input).
* - ebp : points to the location above the registers on the stack,
* as if by the "enter <register_count>" opcode.
* - esp : points to tip of backtracking stack.
*
* The registers eax, ebx and ecx are free to use for computations.
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
* - int* capture_array (int[num_saved_registers_], for output).
* - end of input (index of end of string, relative to *string_base)
* - start of input (index of first character in string, relative
* to *string_base)
* - void** string_base (location of a handle containing the string)
* - return address
* - backup of esi
* - backup of edi
* ebp-> - old ebp
* - register 0 ebp[-4]
* - register 1 ebp[-8]
* - ...
*
* The data before ebp must be placed there by the calling code, e.g.,
* by calling the code as cast to:
* bool (*match)(String** string_base,
* int start_offset,
* int end_offset,
* int* capture_output_array)
*/
#define __ masm_->
RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(
Mode mode,
int registers_to_save,
bool ignore_case)
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
constants_(kRegExpConstantsSize),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
ignore_case_(ignore_case),
entry_label_(),
start_label_(),
success_label_(),
exit_label_(),
self_(Heap::undefined_value()) {
__ jmp(&entry_label_); // We'll write the entry code later.
__ bind(&start_label_); // And then continue from here.
}
RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
delete masm_;
// Unuse labels in case we throw away the assembler without calling GetCode.
entry_label_.Unuse();
start_label_.Unuse();
success_label_.Unuse();
exit_label_.Unuse();
}
void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
ASSERT(by > 0);
Label inside_string;
__ add(Operand(edi), Immediate(by * char_size()));
__ j(below, &inside_string);
Backtrack();
__ bind(&inside_string);
}
void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
ASSERT(reg >= 0);
ASSERT(reg < num_registers_);
__ add(register_location(reg), Immediate(by));
}
void RegExpMacroAssemblerIA32::Backtrack() {
__ pop(ecx);
__ add(Operand(ecx), Immediate(self_));
__ jmp(Operand(ecx));
}
void RegExpMacroAssemblerIA32::Bind(Label* label) {
__ bind(label);
}
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
ReadCurrentChar(eax);
__ sub(Operand(eax), Immediate(start));
__ cmp(eax, 64); // FIXME: 64 = length_of_bitmap_in_bits.
BranchOrBacktrack(greater_equal, on_zero);
__ mov(ebx, eax);
__ shr(ebx, 3);
// TODO(lrn): Where is the bitmap stored? Pass the bitmap as argument instead.
// __ mov(ecx, position_of_bitmap);
__ movzx_b(ebx, Operand(ecx, ebx, times_1, 0));
__ and_(eax, (1<<3)-1);
__ bt(Operand(ebx), eax);
__ j(carry, on_zero);
}
void RegExpMacroAssemblerIA32::CheckCharacter(uc16 c, Label* on_equal) {
__ cmp(edx, c);
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
__ cmp(edx, limit);
BranchOrBacktrack(greater, on_greater);
}
void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
__ cmp(edx, limit);
BranchOrBacktrack(less, on_less);
}
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
int byte_length = str.length() * char_size();
int start_offset = cp_offset * char_size();
__ mov(ebx, edi);
__ add(Operand(ebx), Immediate(start_offset + byte_length));
BranchOrBacktrack(greater_equal, on_failure);
ArraySlice constant_buffer = constants_.GetBuffer(str.length(), char_size());
for (int i = 0; i < str.length(); i++) {
if (mode_ == ASCII) {
constant_buffer.at<char>(i) = static_cast<char>(str[i]);
} else {
memcpy(constant_buffer.location<void>(),
str.start(),
str.length() * sizeof(uc16));
}
}
__ mov(eax, edi);
__ mov(ebx, esi);
__ lea(edi, Operand(esi, edi, times_1, start_offset));
LoadConstantBufferAddress(esi, &constant_buffer);
__ mov(ecx, str.length());
if (mode_ == ASCII) {
__ rep_cmpsb();
} else {
ASSERT(mode_ == UC16);
__ rep_cmpsw();
}
__ mov(esi, ebx);
__ mov(edi, eax);
BranchOrBacktrack(not_equal, on_failure);
}
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
Label* on_equal) {
__ cmp(edi, register_location(register_index));
BranchOrBacktrack(equal, on_equal);
}
void RegExpMacroAssemblerIA32::CheckNotBackReference(
int start_reg, Label* on_no_match) {
if (ignore_case_) {
UNIMPLEMENTED();
}
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
__ sub(ecx, Operand(eax)); // Length to check.
__ j(equal, &fallthrough); // Covers the case where it's not bound (-1,-1).
__ mov(ebx, Operand(edi));
__ push(esi);
__ add(edi, Operand(esi));
__ add(esi, Operand(eax));
if (mode_ == ASCII) {
__ rep_cmpsb();
} else {
__ rep_cmpsw();
}
__ pop(esi);
__ mov(edi, Operand(ebx));
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
__ cmp(edx, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
uc16 mask,
Label* on_not_equal) {
__ mov(eax, Operand(edx));
__ or_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
uc16 c,
uc16 mask,
Label* on_not_equal) {
__ lea(eax, Operand(edx, -mask));
__ or_(eax, mask);
__ cmp(eax, c);
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) {
ReadCurrentChar(eax);
__ sub(Operand(eax), Immediate(start));
__ mov(ecx, eax);
__ shr(eax, 2);
// FIXME: ecx must hold address of map
__ movzx_b(eax, Operand(ecx, eax, times_1, 0));
__ and_(ecx, 0x03);
__ add(ecx, Operand(ecx));
__ shr(eax); // Shift right cl times
Label second_bit_set, case_3, case_1;
__ test(eax, Immediate(0x02));
__ j(not_zero, &second_bit_set);
__ test(eax, Immediate(0x01));
__ j(not_zero, &case_1);
// Case 0:
__ jmp(destinations[0]);
__ bind(&case_1);
// Case 1:
__ jmp(destinations[1]);
__ bind(&second_bit_set);
__ test(eax, Immediate(0x01));
__ j(not_zero, &case_3);
// Case 2
__ jmp(destinations[2]);
__ bind(&case_3);
// Case 3:
__ jmp(destinations[3]);
}
void RegExpMacroAssemblerIA32::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) {
Label fallthrough;
ReadCurrentChar(eax);
__ sub(Operand(eax), Immediate(start));
__ cmp(eax, 64); // FIXME: 64 = size of map. Found somehow??
__ j(greater_equal, &fallthrough);
// FIXME: ecx must hold address of map
__ movzx_b(eax, Operand(ecx, eax, times_1, 0));
// jump table: jump to destinations[eax];
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) {
Label fallthrough;
ReadCurrentChar(eax);
__ shr(eax, 8);
__ sub(Operand(eax), Immediate(start));
__ cmp(eax, destinations.length() - start);
__ j(greater_equal, &fallthrough);
// TODO(lrn) jumptable: jump to destinations[eax]
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::EmitOrLink(Label* label) {
UNREACHABLE(); // Has no use.
}
void RegExpMacroAssemblerIA32::Fail() {
__ mov(eax, 0);
__ jmp(&exit_label_);
}
Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
__ bind(&entry_label_);
__ push(esi);
__ push(edi);
__ enter(Immediate(num_registers_ * sizeof(uint32_t)));
__ mov(esi, Operand(ebp, kInputEndOffset));
__ mov(edi, Operand(ebp, kInputStartOffset));
__ sub(edi, Operand(esi));
__ mov(edx, Operand(ebp, kInputBuffer));
__ mov(edx, Operand(edx, 0));
__ add(esi, Operand(edx));
__ jmp(&start_label_);
// Exit code:
__ bind(&success_label_);
__ mov(ebx, Operand(ebp, kRegisterOutput));
__ mov(ecx, Operand(ebp, kInputEndOffset));
__ sub(ecx, Operand(ebp, kInputStartOffset));
for (int i = 0; i < num_saved_registers_; i++) {
__ mov(eax, register_location(i));
__ sub(eax, Operand(ecx)); // Convert to index from start, not end.
__ mov(Operand(ebx, i * sizeof(int32_t)), eax);
}
// copy captures to output
__ mov(eax, Immediate(1));
__ bind(&exit_label_);
__ leave();
__ pop(edi);
__ pop(esi);
__ ret(0);
CodeDesc code_desc;
masm_->GetCode(&code_desc);
Handle<Code> code = Factory::NewCode(code_desc,
NULL,
Code::ComputeFlags(Code::REGEXP),
self_);
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)"));
return Handle<Object>::cast(code);
}
void RegExpMacroAssemblerIA32::GoTo(Label* to) {
__ jmp(to);
}
void RegExpMacroAssemblerIA32::IfRegisterGE(int reg,
int comparand,
Label* if_ge) {
__ cmp(register_location(reg), Immediate(comparand));
BranchOrBacktrack(greater_equal, if_ge);
}
void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
int comparand,
Label* if_lt) {
__ cmp(register_location(reg), Immediate(comparand));
BranchOrBacktrack(less, if_lt);
}
RegExpMacroAssembler::IrregexpImplementation
RegExpMacroAssemblerIA32::Implementation() {
return kIA32Implementation;
}
void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) {
ASSERT(cp_offset >= 0);
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset);
BranchOrBacktrack(less_equal, on_end_of_input);
ReadChar(edx, cp_offset);
}
void RegExpMacroAssemblerIA32::PopCurrentPosition() {
__ pop(edi);
}
void RegExpMacroAssemblerIA32::PopRegister(int register_index) {
RecordRegister(register_index);
__ pop(register_location(register_index));
}
void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
// Check for preemption first.
Label no_preempt;
Label retry_preempt;
// Check for preemption.
ExternalReference stack_limit =
ExternalReference::address_of_stack_guard_limit();
__ cmp(esp, Operand::StaticVariable(stack_limit));
__ j(above, &no_preempt);
__ push(edi); // Current position.
__ push(edx); // Current character.
// Restore original edi, esi.
__ mov(edi, Operand(ebp, kBackup_edi));
__ mov(esi, Operand(ebp, kBackup_esi));
__ bind(&retry_preempt);
// simulate stack for Runtime call.
__ push(Immediate(0)); // Dummy receiver
__ CallRuntime(Runtime::kStackGuard, 0);
__ cmp(esp, Operand::StaticVariable(stack_limit));
__ j(below_equal, &retry_preempt);
__ pop(edx);
__ pop(edi);
__ mov(esi, Operand(ebp, kInputBuffer));
__ mov(esi, Operand(esi, 0));
__ add(esi, Operand(ebp, kInputEndOffset));
__ bind(&no_preempt);
Label cont;
__ push(label, RelocInfo::NONE);
}
void RegExpMacroAssemblerIA32::PushCurrentPosition() {
__ push(edi);
}
void RegExpMacroAssemblerIA32::PushRegister(int register_index) {
__ push(register_location(register_index));
}
void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) {
__ mov(edi, register_location(reg));
}
void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) {
__ mov(esp, register_location(reg));
}
void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
RecordRegister(register_index);
__ mov(register_location(register_index), Immediate(to));
}
void RegExpMacroAssemblerIA32::Succeed() {
__ jmp(&success_label_);
}
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(
int register_index) {
__ mov(register_location(register_index), edi);
}
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
__ mov(register_location(reg), esp);
}
// Private methods:
Operand RegExpMacroAssemblerIA32::register_location(
int register_index) {
ASSERT(register_index < (1<<30));
return Operand(ebp, -((register_index + 1) * sizeof(uint32_t)));
}
size_t RegExpMacroAssemblerIA32::char_size() {
return static_cast<size_t>(mode_);
}
void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
if (to == NULL) {
Backtrack();
return;
}
__ jmp(to);
return;
} else if (to == NULL) {
Label skip;
__ j(NegateCondition(condition), &skip);
Backtrack();
__ bind(&skip);
return;
}
__ j(condition, to);
}
void RegExpMacroAssemblerIA32::Canonicalize(Register reg) {
if (mode_ == ASCII) {
Label end;
__ cmp(Operand(reg), Immediate('a'));
__ j(below, &end);
__ cmp(Operand(reg), Immediate('z'));
__ j(above, &end);
__ sub(Operand(reg), Immediate('a' - 'A'));
__ bind(&end);
return;
}
ASSERT(mode_ == UC16);
// TODO(lrn): Use some tables.
}
void RegExpMacroAssemblerIA32::RecordRegister(int register_index) {
if (register_index >= num_registers_) {
num_registers_ = register_index + 1;
}
}
void RegExpMacroAssemblerIA32::ReadChar(Register destination, int offset) {
if (mode_ == ASCII) {
__ movzx_b(destination, Operand(esi, edi, times_1, offset));
return;
}
ASSERT(mode_ == UC16);
__ movzx_w(destination, Operand(esi, edi, times_1, offset * 2));
}
void RegExpMacroAssemblerIA32::ReadCurrentChar(Register destination) {
__ mov(destination, edx);
}
void RegExpMacroAssemblerIA32::LoadConstantBufferAddress(Register reg,
ArraySlice* buffer) {
__ mov(reg, buffer->array());
__ add(Operand(reg), Immediate(buffer->base_offset()));
}
#undef __
}}

View File

@ -0,0 +1,162 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef REGEXP_MACRO_ASSEMBLER_IA32_H_
#define REGEXP_MACRO_ASSEMBLER_IA32_H_
#if !(defined __arm__ || defined __thumb__ || defined ARM)
#include "regexp-macro-assembler.h"
#include "macro-assembler-ia32.h"
namespace v8 { namespace internal {
class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
public:
enum Mode {ASCII = 1, UC16 = 2};
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save, bool ignore_case);
virtual ~RegExpMacroAssemblerIA32();
virtual void AdvanceCurrentPosition(int by);
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 mask,
Label* on_not_equal);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
virtual void PushCurrentPosition();
virtual void PushRegister(int register_index);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
private:
// Offsets from ebp of arguments to function.
static const int kBackup_edi = 1 * sizeof(uint32_t);
static const int kBackup_esi= 2 * sizeof(uint32_t);
static const int kInputBuffer = 4 * sizeof(uint32_t);
static const int kInputStartOffset = 5 * sizeof(uint32_t);
static const int kInputEndOffset = 6 * sizeof(uint32_t);
static const int kRegisterOutput = 7 * sizeof(uint32_t);
// The ebp-relative location of a regexp register.
Operand register_location(int register_index);
// Whether to implement case-insensitive matching.
bool ignore_case();
// Byte size of chars in the string to match (decided by the Mode argument)
size_t char_size();
// Records that a register is used. At the end, we need the number of
// registers used.
void RecordRegister(int register_index);
// Equivalent to a conditional branch to the label, unless the label
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Generate code to perform case-canonicalization on the register.
void Canonicalize(Register register);
// Read a character from input at the given offset from the current
// position.
void ReadChar(Register destination, int offset);
// Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers.
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
// Read the current character into the destination register.
void ReadCurrentChar(Register destination);
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
// Initial size of constant buffers allocated during compilation.
static const int kRegExpConstantsSize = 256;
// Only unroll loops up to this length.
static const int kMaxInlineStringTests = 8;
// Special "character" marking end of input.
static const uint32_t kEndOfInput = ~0;
MacroAssembler* masm_;
ByteArrayProvider constants_;
// Which mode to generate code for (ASCII or UTF16).
Mode mode_;
// One greater than maximal register index actually used.
int num_registers_;
// Number of registers to output at the end (the saved registers
// are always 0..num_saved_registers_-1)
int num_saved_registers_;
// Whether to generate code that is case-insensitive. Only relevant for
// back-references.
bool ignore_case_;
Label entry_label_;
Label start_label_;
Label success_label_;
Label exit_label_;
// Handle used to represent the generated code object itself.
Handle<Object> self_;
};
}}
#endif // !ARM
#endif /* REGEXP_MACRO_ASSEMBLER_IA32_H_ */

View File

@ -0,0 +1,266 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "v8.h"
#include "ast.h"
#include "bytecodes-irregexp.h"
#include "assembler-irregexp.h"
#include "assembler-irregexp-inl.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-irregexp.h"
namespace v8 { namespace internal {
RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
}
RegExpMacroAssemblerIrregexp::IrregexpImplementation
RegExpMacroAssemblerIrregexp::Implementation() {
return kBytecodeImplementation;
}
void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
assembler_->Bind(l);
}
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
assembler_->EmitOrLink(l);
}
void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
assembler_->PopRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
assembler_->PushRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index) {
assembler_->WriteCurrentPositionToRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
int register_index) {
assembler_->ReadCurrentPositionFromRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
int register_index) {
assembler_->WriteStackPointerToRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
int register_index) {
assembler_->ReadStackPointerFromRegister(register_index);
}
void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
assembler_->SetRegister(register_index, to);
}
void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
assembler_->AdvanceRegister(register_index, by);
}
void RegExpMacroAssemblerIrregexp::PopCurrentPosition() {
assembler_->PopCurrentPosition();
}
void RegExpMacroAssemblerIrregexp::PushCurrentPosition() {
assembler_->PushCurrentPosition();
}
void RegExpMacroAssemblerIrregexp::Backtrack() {
assembler_->PopBacktrack();
}
void RegExpMacroAssemblerIrregexp::GoTo(Label* l) {
assembler_->GoTo(l);
}
void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) {
assembler_->PushBacktrack(l);
}
void RegExpMacroAssemblerIrregexp::Succeed() {
assembler_->Succeed();
}
void RegExpMacroAssemblerIrregexp::Fail() {
assembler_->Fail();
}
void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
assembler_->AdvanceCP(by);
}
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition(
int register_index,
Label* on_equal) {
// TODO(erikcorry): Implement.
UNREACHABLE();
}
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
Label* on_failure) {
assembler_->LoadCurrentChar(cp_offset, on_failure);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) {
assembler_->CheckCharacterLT(limit, on_less);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
Label* on_greater) {
assembler_->CheckCharacterGT(limit, on_greater);
}
void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) {
assembler_->CheckCharacter(c, on_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c,
Label* on_not_equal) {
assembler_->CheckNotCharacter(c, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(uc16 c,
uc16 mask,
Label* on_not_equal) {
assembler_->OrThenCheckNotCharacter(c, mask, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
uc16 c,
uc16 mask,
Label* on_not_equal) {
assembler_->MinusOrThenCheckNotCharacter(c, mask, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReference(start_reg, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
assembler_->LookupMap1(start, bitmap, on_zero);
}
void RegExpMacroAssemblerIrregexp::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& table) {
assembler_->LookupMap2(start, half_nibble_map, table);
}
void RegExpMacroAssemblerIrregexp::DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& table) {
assembler_->LookupMap8(start, byte_map, table);
}
void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& table) {
assembler_->LookupHighMap8(start, byte_map, table);
}
void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
for (int i = str.length() - 1; i >= 0; i--) {
assembler_->LoadCurrentChar(cp_offset + i, on_failure);
assembler_->CheckNotCharacter(str[i], on_failure);
}
}
void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
int comparand,
Label* if_less_than) {
ASSERT(comparand >= 0 && comparand <= 65535);
assembler_->CheckRegisterLT(register_index, comparand, if_less_than);
}
void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
int comparand,
Label* if_greater_or_equal) {
ASSERT(comparand >= 0 && comparand <= 65535);
assembler_->CheckRegisterGE(register_index, comparand, if_greater_or_equal);
}
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() {
Handle<ByteArray> array = Factory::NewByteArray(assembler_->length());
assembler_->Copy(array->GetDataStartAddress());
return array;
}
} } // namespace v8::internal

View File

@ -0,0 +1,93 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_
#define V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_
namespace v8 { namespace internal {
class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
public:
explicit RegExpMacroAssemblerIrregexp(IrregexpAssembler* assembler)
: assembler_(assembler) {
}
virtual ~RegExpMacroAssemblerIrregexp();
virtual void Bind(Label* label);
virtual void EmitOrLink(Label* label);
virtual void AdvanceCurrentPosition(int by); // Signed cp change.
virtual void PopCurrentPosition();
virtual void PushCurrentPosition();
virtual void Backtrack();
virtual void GoTo(Label* label);
virtual void PushBacktrack(Label* label);
virtual void Succeed();
virtual void Fail();
virtual void PopRegister(int register_index);
virtual void PushRegister(int register_index);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 mask,
Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void DispatchHighByteMap(byte start,
Label* byte_map,
const Vector<Label*>& destinations);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode();
private:
IrregexpAssembler* assembler_;
};
} } // namespace v8::internal
#endif // V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_

View File

@ -0,0 +1,77 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string.h>
#include "v8.h"
#include "ast.h"
#include "assembler.h"
#include "regexp-macro-assembler.h"
namespace v8 { namespace internal {
RegExpMacroAssembler::RegExpMacroAssembler() {
}
RegExpMacroAssembler::~RegExpMacroAssembler() {
}
ByteArrayProvider::ByteArrayProvider(unsigned int initial_size)
: byte_array_size_(initial_size),
current_byte_array_(),
current_byte_array_free_offset_(initial_size) {}
ArraySlice ByteArrayProvider::GetBuffer(unsigned int size,
unsigned int elem_size) {
ASSERT(size > 0);
size_t byte_size = size * elem_size;
int free_offset = current_byte_array_free_offset_;
// align elements
free_offset += elem_size - 1;
free_offset = free_offset - (free_offset % elem_size);
if (free_offset + byte_size > byte_array_size_) {
if (byte_size > (byte_array_size_ / 2)) {
Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED));
return ArraySlice(solo_buffer, 0);
}
current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED);
free_offset = 0;
}
current_byte_array_free_offset_ = free_offset + size;
return ArraySlice(current_byte_array_, free_offset);
}
template <typename T>
ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) {
ArraySlice slice = GetBuffer(values.length(), sizeof(T));
memcpy(slice.location<void>(), values.start(), values.length() * sizeof(T));
return slice;
}
} }

View File

@ -0,0 +1,181 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
#define V8_REGEXP_MACRO_ASSEMBLER_H_
namespace v8 { namespace internal {
struct DisjunctDecisionRow {
RegExpCharacterClass cc;
Label* on_match;
};
class RegExpMacroAssembler {
public:
enum IrregexpImplementation {
kIA32Implementation,
kARMImplementation,
kBytecodeImplementation};
RegExpMacroAssembler();
virtual ~RegExpMacroAssembler();
virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
virtual void Backtrack() = 0;
virtual void Bind(Label* label) = 0;
// Check the current character against a bitmap. The range of the current
// character must be from start to start + length_of_bitmap_in_bits.
virtual void CheckBitmap(
uc16 start, // The bitmap is indexed from this character.
Label* bitmap, // Where the bitmap is emitted.
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack abnd go to that.
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) = 0;
// Check the current input position against a register. If the register is
// equal to the current position then go to the label. If the label is NULL
// then backtrack instead.
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
// Bitwise or the current character with the given constant and then
// check for a match with c.
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,
Label* on_not_equal) = 0;
// Subtract a constant from the current character, then or with the given
// constant and then check for a match with c.
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal) = 0;
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
uc16 start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
const Vector<Label*>& destinations) = 0;
// Dispatch after looking the high byte of the current character up in a byte
// map. The destinations vector has up to 256 labels.
virtual void DispatchHighByteMap(
byte start,
Label* byte_map,
const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0;
virtual Handle<Object> GetCode() = 0;
virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL.
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
// Check whether a register is < a given constant and go to a label if it is.
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0;
virtual void PushCurrentPosition() = 0;
virtual void PushRegister(int register_index) = 0;
virtual void ReadCurrentPositionFromRegister(int reg) = 0;
virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:
};
struct ArraySlice {
public:
ArraySlice(Handle<ByteArray> array, size_t offset)
: array_(array), offset_(offset) {}
Handle<ByteArray> array() { return array_; }
// Offset in the byte array data.
size_t offset() { return offset_; }
// Offset from the ByteArray pointer.
size_t base_offset() {
return ByteArray::kHeaderSize - kHeapObjectTag + offset_;
}
template <typename T>
T* location() {
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_);
}
template <typename T>
T& at(int idx) {
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_)[idx];
}
private:
Handle<ByteArray> array_;
size_t offset_;
};
class ByteArrayProvider {
public:
explicit ByteArrayProvider(unsigned int initial_size);
// Provides a place to put "size" elements of size "element_size".
// The information can be stored in the provided ByteArray at the "offset".
// The offset is aligned to the element size.
ArraySlice GetBuffer(unsigned int size,
unsigned int element_size);
template <typename T>
ArraySlice GetBuffer(Vector<T> values);
private:
size_t byte_array_size_;
Handle<ByteArray> current_byte_array_;
int current_byte_array_free_offset_;
};
} } // namespace v8::internal
#endif // V8_REGEXP_MACRO_ASSEMBLER_H_

View File

@ -288,7 +288,7 @@ static Object* Runtime_IsConstructCall(Arguments args) {
static Object* Runtime_RegExpCompile(Arguments args) {
HandleScope scope; // create a new handle scope
HandleScope scope;
ASSERT(args.length() == 3);
CONVERT_CHECKED(JSRegExp, raw_re, args[0]);
Handle<JSRegExp> re(raw_re);
@ -786,7 +786,9 @@ static Object* Runtime_RegExpExec(Arguments args) {
Handle<String> subject(raw_subject);
Handle<Object> index(args[2]);
ASSERT(index->IsNumber());
return *RegExpImpl::Exec(regexp, subject, index);
Handle<Object> result = RegExpImpl::Exec(regexp, subject, index);
if (result.is_null()) return Failure::Exception();
return *result;
}
@ -797,7 +799,9 @@ static Object* Runtime_RegExpExecGlobal(Arguments args) {
Handle<JSRegExp> regexp(raw_regexp);
CONVERT_CHECKED(String, raw_subject, args[1]);
Handle<String> subject(raw_subject);
return *RegExpImpl::ExecGlobal(regexp, subject);
Handle<Object> result = RegExpImpl::ExecGlobal(regexp, subject);
if (result.is_null()) return Failure::Exception();
return *result;
}
@ -2444,7 +2448,7 @@ static Object* ConvertCase(Arguments args,
// in the buffer
Access<StringInputBuffer> buffer(&string_input_buffer);
buffer->Reset(s);
unibrow::uchar chars[unibrow::kMaxCaseConvertedSize];
unibrow::uchar chars[Converter::kMaxWidth];
int i = 0;
// We can assume that the string is not empty
uc32 current = buffer->GetNext();

View File

@ -93,13 +93,13 @@ static bool IsControlChar(char c) {
}
void StringStream::Add(const char* format, Vector<FmtElm> elms) {
void StringStream::Add(Vector<const char> format, Vector<FmtElm> elms) {
// If we already ran out of space then return immediately.
if (space() == 0)
return;
int offset = 0;
int elm = 0;
while (format[offset] != '\0') {
while (offset < format.length()) {
if (format[offset] != '%' || elm == elms.length()) {
Put(format[offset]);
offset++;
@ -111,12 +111,11 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
// Skip over the whole control character sequence until the
// format element type
temp[format_length++] = format[offset++];
// '\0' is not a control character so we don't have to
// explicitly check for the end of the string
while (IsControlChar(format[offset]))
while (offset < format.length() && IsControlChar(format[offset]))
temp[format_length++] = format[offset++];
if (offset >= format.length())
return;
char type = format[offset];
if (type == '\0') return;
temp[format_length++] = type;
temp[format_length] = '\0';
offset++;
@ -128,17 +127,36 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
Add(value);
break;
}
case 'w': {
ASSERT_EQ(FmtElm::LC_STR, current.type_);
Vector<const uc16> value = *current.data_.u_lc_str_;
for (int i = 0; i < value.length(); i++)
Put(static_cast<char>(value[i]));
break;
}
case 'o': {
ASSERT_EQ(FmtElm::OBJ, current.type_);
Object* obj = current.data_.u_obj_;
PrintObject(obj);
break;
}
case 'i': case 'd': case 'u': case 'x': case 'c': case 'p': {
case 'k': {
ASSERT_EQ(FmtElm::INT, current.type_);
int value = current.data_.u_int_;
if (0x20 <= value && value <= 0x7F) {
Put(value);
} else if (value <= 0xff) {
Add("\\x%02x", value);
} else {
Add("\\u%04x", value);
}
break;
}
case 'i': case 'd': case 'u': case 'x': case 'c': case 'p': case 'X': {
int value = current.data_.u_int_;
EmbeddedVector<char, 24> formatted;
OS::SNPrintF(formatted, temp.start(), value);
Add(formatted.start());
int length = OS::SNPrintF(formatted, temp.start(), value);
Add(Vector<const char>(formatted.start(), length));
break;
}
case 'f': case 'g': case 'G': case 'e': case 'E': {
@ -154,10 +172,8 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
}
}
// Verify that the buffer is 0-terminated and doesn't contain any
// other 0-characters.
// Verify that the buffer is 0-terminated
ASSERT(buffer_[length_] == '\0');
ASSERT(strlen(buffer_) == length_);
}
@ -188,6 +204,11 @@ void StringStream::PrintObject(Object* o) {
void StringStream::Add(const char* format) {
Add(CStrVector(format));
}
void StringStream::Add(Vector<const char> format) {
Add(format, Vector<FmtElm>::empty());
}
@ -195,14 +216,14 @@ void StringStream::Add(const char* format) {
void StringStream::Add(const char* format, FmtElm arg0) {
const char argc = 1;
FmtElm argv[argc] = { arg0 };
Add(format, Vector<FmtElm>(argv, argc));
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
}
void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1) {
const char argc = 2;
FmtElm argv[argc] = { arg0, arg1 };
Add(format, Vector<FmtElm>(argv, argc));
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
}
@ -210,7 +231,15 @@ void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1,
FmtElm arg2) {
const char argc = 3;
FmtElm argv[argc] = { arg0, arg1, arg2 };
Add(format, Vector<FmtElm>(argv, argc));
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
}
void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1,
FmtElm arg2, FmtElm arg3) {
const char argc = 4;
FmtElm argv[argc] = { arg0, arg1, arg2, arg3 };
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
}

View File

@ -75,17 +75,19 @@ class FmtElm {
FmtElm(int value) : type_(INT) { data_.u_int_ = value; } // NOLINT
explicit FmtElm(double value) : type_(DOUBLE) { data_.u_double_ = value; } // NOLINT
FmtElm(const char* value) : type_(C_STR) { data_.u_c_str_ = value; } // NOLINT
FmtElm(const Vector<const uc16>& value) : type_(LC_STR) { data_.u_lc_str_ = &value; } // NOLINT
FmtElm(Object* value) : type_(OBJ) { data_.u_obj_ = value; } // NOLINT
FmtElm(Handle<Object> value) : type_(HANDLE) { data_.u_handle_ = value.location(); } // NOLINT
FmtElm(void* value) : type_(INT) { data_.u_int_ = reinterpret_cast<int>(value); } // NOLINT
private:
friend class StringStream;
enum Type { INT, DOUBLE, C_STR, OBJ, HANDLE };
enum Type { INT, DOUBLE, C_STR, LC_STR, OBJ, HANDLE };
Type type_;
union {
int u_int_;
double u_double_;
const char* u_c_str_;
const Vector<const uc16>* u_lc_str_;
Object* u_obj_;
Object** u_handle_;
} data_;
@ -108,11 +110,17 @@ class StringStream {
bool Put(char c);
bool Put(String* str);
bool Put(String* str, int start, int end);
void Add(const char* format, Vector<FmtElm> elms);
void Add(Vector<const char> format, Vector<FmtElm> elms);
void Add(const char* format);
void Add(Vector<const char> format);
void Add(const char* format, FmtElm arg0);
void Add(const char* format, FmtElm arg0, FmtElm arg1);
void Add(const char* format, FmtElm arg0, FmtElm arg1, FmtElm arg2);
void Add(const char* format,
FmtElm arg0,
FmtElm arg1,
FmtElm arg2,
FmtElm arg3);
// Getting the message out.
void OutputToStdOut();

View File

@ -892,7 +892,7 @@ Object* StubCompiler::CompileCallDebugPrepareStepIn(Code::Flags flags) {
Object* StubCompiler::GetCodeWithFlags(Code::Flags flags) {
CodeDesc desc;
masm_.GetCode(&desc);
Object* result = Heap::CreateCode(desc, NULL, flags);
Object* result = Heap::CreateCode(desc, NULL, flags, NULL);
#ifdef DEBUG
if (FLAG_print_code_stubs && !result->IsFailure()) {
Code::cast(result)->Print();

File diff suppressed because one or more lines are too long

View File

@ -44,7 +44,7 @@ typedef unsigned char byte;
* The max length of the result of converting the case of a single
* character.
*/
static const int kMaxCaseConvertedSize = 3;
static const int kMaxMappingSize = 4;
template <class T, int size = 256>
class Predicate {
@ -80,12 +80,13 @@ class Mapping {
friend class Test;
int CalculateValue(uchar c, uchar n, uchar* result);
struct CacheEntry {
inline CacheEntry() : code_point_(0), offset_(0) { }
inline CacheEntry() : code_point_(kNoChar), offset_(0) { }
inline CacheEntry(uchar code_point, signed offset)
: code_point_(code_point),
offset_(offset) { }
uchar code_point_ : 21;
signed offset_ : 11;
uchar code_point_;
signed offset_;
static const int kNoChar = (1 << 21) - 1;
};
static const int kSize = size;
static const int kMask = kSize - 1;
@ -222,45 +223,15 @@ struct Letter {
struct Space {
static bool Is(uchar c);
};
struct Titlecase {
static bool Is(uchar c);
};
struct Number {
static bool Is(uchar c);
};
struct DecimalDigit {
static bool Is(uchar c);
};
struct Ideographic {
static bool Is(uchar c);
};
struct WhiteSpace {
static bool Is(uchar c);
};
struct HexDigit {
static bool Is(uchar c);
};
struct AsciiHexDigit {
static bool Is(uchar c);
};
struct BidiControl {
static bool Is(uchar c);
};
struct JoinControl {
static bool Is(uchar c);
};
struct Dash {
static bool Is(uchar c);
};
struct Hyphen {
static bool Is(uchar c);
};
struct LineTerminator {
static bool Is(uchar c);
};
struct RegExpSpecialChar {
static bool Is(uchar c);
};
struct CombiningMark {
static bool Is(uchar c);
};
@ -268,12 +239,35 @@ struct ConnectorPunctuation {
static bool Is(uchar c);
};
struct ToLowercase {
static const int kMaxWidth = 3;
static int Convert(uchar c,
uchar n,
uchar* result,
bool* allow_caching_ptr);
};
struct ToUppercase {
static const int kMaxWidth = 3;
static int Convert(uchar c,
uchar n,
uchar* result,
bool* allow_caching_ptr);
};
struct Ecma262Canonicalize {
static const int kMaxWidth = 1;
static int Convert(uchar c,
uchar n,
uchar* result,
bool* allow_caching_ptr);
};
struct Ecma262UnCanonicalize {
static const int kMaxWidth = 4;
static int Convert(uchar c,
uchar n,
uchar* result,
bool* allow_caching_ptr);
};
struct CanonicalizationRange {
static const int kMaxWidth = 1;
static int Convert(uchar c,
uchar n,
uchar* result,

View File

@ -83,6 +83,23 @@ static inline T RoundUp(T x, int m) {
}
template <typename T>
static int Spaceship(const T& a, const T& b) {
if (a == b)
return 0;
else if (a < b)
return -1;
else
return 1;
}
template <typename T>
static int PointerSpaceship(const T* a, const T* b) {
return Spaceship<T>(*a, *b);
}
// Returns the smallest power of two which is >= x. If you pass in a
// number that is already a power of two, it is returned as is.
uint32_t RoundUpToPowerOf2(uint32_t x);
@ -283,6 +300,15 @@ class Vector {
return Vector<T>(NewArray<T>(length), length);
}
// Returns a vector using the same backing storage as this one,
// spanning from and including 'from', to but not including 'to'.
Vector<T> SubVector(int from, int to) {
ASSERT(from < length_);
ASSERT(to <= length_);
ASSERT(from < to);
return Vector<T>(start() + from, to - from);
}
// Returns the length of the vector.
int length() const { return length_; }
@ -298,6 +324,10 @@ class Vector {
return start_[index];
}
T& first() { return start_[0]; }
T& last() { return start_[length_ - 1]; }
// Returns a clone of this vector with a new backing store.
Vector<T> Clone() const {
T* result = NewArray<T>(length_);
@ -305,6 +335,18 @@ class Vector {
return Vector<T>(result, length_);
}
void Sort(int (*cmp)(const T*, const T*)) {
typedef int (*RawComparer)(const void*, const void*);
qsort(start(),
length(),
sizeof(T),
reinterpret_cast<RawComparer>(cmp));
}
void Sort() {
Sort(PointerSpaceship<T>);
}
// Releases the array underlying this vector. Once disposed the
// vector is empty.
void Dispose() {
@ -465,6 +507,58 @@ static inline void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
}
}
static inline int Load16(const byte* pc) {
#ifdef CAN_READ_UNALIGNED
return *reinterpret_cast<const uint16_t*>(pc);
#else
uint32_t word;
word = pc[1];
word |= pc[0] << 8;
return word;
#endif
}
static inline int Load32(const byte* pc) {
#ifdef CAN_READ_UNALIGNED
return *reinterpret_cast<const uint32_t*>(pc);
#else
uint32_t word;
word = pc[3];
word |= pc[2] << 8;
word |= pc[1] << 16;
word |= pc[0] << 24;
return word;
#endif
}
static inline void Store16(byte* pc, uint16_t value) {
#ifdef CAN_READ_UNALIGNED
*reinterpret_cast<uint16_t*>(pc) = value;
#else
pc[1] = value;
pc[0] = value >> 8;
#endif
}
static inline void Store32(byte* pc, uint32_t value) {
#ifdef CAN_READ_UNALIGNED
*reinterpret_cast<uint32_t*>(pc) = value;
#else
pc[3] = value;
pc[2] = value >> 8;
pc[1] = value >> 16;
pc[0] = value >> 24;
#endif
}
} } // namespace v8::internal
#endif // V8_UTILS_H_

View File

@ -38,7 +38,7 @@ SOURCES = {
'test-ast.cc', 'test-heap.cc', 'test-utils.cc', 'test-compiler.cc',
'test-spaces.cc', 'test-mark-compact.cc', 'test-lock.cc',
'test-conversions.cc', 'test-strings.cc', 'test-serialize.cc',
'test-decls.cc', 'test-alloc.cc'
'test-decls.cc', 'test-alloc.cc', 'test-regexp.cc'
],
'arch:arm': ['test-assembler-arm.cc', 'test-disasm-arm.cc'],
'arch:ia32': ['test-assembler-ia32.cc', 'test-disasm-ia32.cc'],

922
test/cctest/test-regexp.cc Normal file
View File

@ -0,0 +1,922 @@
// Copyright 2006-2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <set>
#include "v8.h"
#include "cctest.h"
#include "zone-inl.h"
#include "parser.h"
#include "ast.h"
#include "jsregexp-inl.h"
#include "assembler-irregexp.h"
#include "regexp-macro-assembler.h"
#include "regexp-macro-assembler-irregexp.h"
#include "regexp-macro-assembler-ia32.h"
#include "interpreter-irregexp.h"
using namespace v8::internal;
static SmartPointer<const char> Parse(const char* input) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
SmartPointer<const char> output = result.tree->ToString();
return output;
}
static bool ParseEscapes(const char* input) {
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.has_character_escapes;
}
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
#define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \
ParseEscapes(input));
TEST(Parser) {
V8::Initialize(NULL);
CHECK_PARSE_EQ("abc", "'abc'");
CHECK_PARSE_EQ("", "%");
CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
CHECK_PARSE_EQ("(?:foo)", "'foo'");
CHECK_PARSE_EQ("(?: foo )", "' foo '");
CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
CHECK_PARSE_EQ("()", "(^ %)");
CHECK_PARSE_EQ("(?=)", "(-> + %)");
CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
CHECK_PARSE_EQ("[x]", "[x]");
CHECK_PARSE_EQ("[xyz]", "[x y z]");
CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
CHECK_PARSE_EQ("]", "']'");
CHECK_PARSE_EQ("}", "'}'");
CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
CHECK_PARSE_EQ("[\\d]", "[0-9]");
CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
"'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CHECK_PARSE_EQ("\\c!", "'c!'");
CHECK_PARSE_EQ("\\c_", "'c_'");
CHECK_PARSE_EQ("\\c~", "'c~'");
CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
CHECK_PARSE_EQ("\\0", "'\\x00'");
CHECK_PARSE_EQ("\\8", "'8'");
CHECK_PARSE_EQ("\\9", "'9'");
CHECK_PARSE_EQ("\\11", "'\\x09'");
CHECK_PARSE_EQ("\\11a", "'\\x09a'");
CHECK_PARSE_EQ("\\011", "'\\x09'");
CHECK_PARSE_EQ("\\00011", "'\\x0011'");
CHECK_PARSE_EQ("\\118", "'\\x098'");
CHECK_PARSE_EQ("\\111", "'I'");
CHECK_PARSE_EQ("\\1111", "'I1'");
CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g (<- 1)))");
CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g (<- 2)))");
CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g (<- 3)))");
CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g '\\x04'))");
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
" (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
" (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");
CHECK_PARSE_EQ("[\\0]", "[\\x00]");
CHECK_PARSE_EQ("[\\11]", "[\\x09]");
CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
CHECK_PARSE_EQ("[\\011]", "[\\x09]");
CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
CHECK_PARSE_EQ("[\\111]", "[I]");
CHECK_PARSE_EQ("[\\1111]", "[I 1]");
CHECK_PARSE_EQ("\\x34", "'\x34'");
CHECK_PARSE_EQ("\\x60", "'\x60'");
CHECK_PARSE_EQ("\\x3z", "'x3z'");
CHECK_PARSE_EQ("\\u0034", "'\x34'");
CHECK_PARSE_EQ("\\u003z", "'u003z'");
CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
CHECK_ESCAPES("a", false);
CHECK_ESCAPES("a|b", false);
CHECK_ESCAPES("a\\n", true);
CHECK_ESCAPES("^a", false);
CHECK_ESCAPES("a$", false);
CHECK_ESCAPES("a\\b!", false);
CHECK_ESCAPES("a\\Bb", false);
CHECK_ESCAPES("a*", false);
CHECK_ESCAPES("a*?", false);
CHECK_ESCAPES("a?", false);
CHECK_ESCAPES("a??", false);
CHECK_ESCAPES("a{0,1}?", false);
CHECK_ESCAPES("a{1,1}?", false);
CHECK_ESCAPES("a{1,2}?", false);
CHECK_ESCAPES("a+?", false);
CHECK_ESCAPES("(a)", false);
CHECK_ESCAPES("(a)\\1", false);
CHECK_ESCAPES("(\\1a)", false);
CHECK_ESCAPES("\\1(a)", false);
CHECK_ESCAPES("a\\s", false);
CHECK_ESCAPES("a\\S", false);
CHECK_ESCAPES("a\\d", false);
CHECK_ESCAPES("a\\D", false);
CHECK_ESCAPES("a\\w", false);
CHECK_ESCAPES("a\\W", false);
CHECK_ESCAPES("a.", false);
CHECK_ESCAPES("a\\q", true);
CHECK_ESCAPES("a[a]", false);
CHECK_ESCAPES("a[^a]", false);
CHECK_ESCAPES("a[a-z]", false);
CHECK_ESCAPES("a[\\q]", false);
CHECK_ESCAPES("a(?:b)", false);
CHECK_ESCAPES("a(?=b)", false);
CHECK_ESCAPES("a(?!b)", false);
CHECK_ESCAPES("\\x60", true);
CHECK_ESCAPES("\\u0060", true);
CHECK_ESCAPES("\\cA", true);
CHECK_ESCAPES("\\q", true);
CHECK_ESCAPES("\\1112", true);
CHECK_ESCAPES("\\0", true);
CHECK_ESCAPES("(a)\\1", false);
CHECK_PARSE_EQ("a{}", "'a{}'");
CHECK_PARSE_EQ("a{,}", "'a{,}'");
CHECK_PARSE_EQ("a{", "'a{'");
CHECK_PARSE_EQ("a{z}", "'a{z}'");
CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
CHECK_PARSE_EQ("a{12,", "'a{12,'");
CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
CHECK_PARSE_EQ("{}", "'{}'");
CHECK_PARSE_EQ("{,}", "'{,}'");
CHECK_PARSE_EQ("{", "'{'");
CHECK_PARSE_EQ("{z}", "'{z}'");
CHECK_PARSE_EQ("{1z}", "'{1z}'");
CHECK_PARSE_EQ("{12z}", "'{12z}'");
CHECK_PARSE_EQ("{12,", "'{12,'");
CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
}
TEST(ParserRegression) {
CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
CHECK_PARSE_EQ("{", "'{'");
CHECK_PARSE_EQ("a|", "(| 'a' %)");
}
static void ExpectError(const char* input,
const char* expected) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
CHECK_EQ(expected, *str);
}
TEST(Errors) {
V8::Initialize(NULL);
const char* kEndBackslash = "\\ at end of pattern";
ExpectError("\\", kEndBackslash);
const char* kUnterminatedGroup = "Unterminated group";
ExpectError("(foo", kUnterminatedGroup);
const char* kInvalidGroup = "Invalid group";
ExpectError("(?", kInvalidGroup);
const char* kUnterminatedCharacterClass = "Unterminated character class";
ExpectError("[", kUnterminatedCharacterClass);
ExpectError("[a-", kUnterminatedCharacterClass);
const char* kIllegalCharacterClass = "Illegal character class";
ExpectError("[a-\\w]", kIllegalCharacterClass);
const char* kEndControl = "\\c at end of pattern";
ExpectError("\\c", kEndControl);
const char* kNothingToRepeat = "Nothing to repeat";
ExpectError("*", kNothingToRepeat);
ExpectError("?", kNothingToRepeat);
ExpectError("+", kNothingToRepeat);
ExpectError("{1}", kNothingToRepeat);
ExpectError("{1,2}", kNothingToRepeat);
ExpectError("{1,}", kNothingToRepeat);
}
static bool IsDigit(uc16 c) {
return ('0' <= c && c <= '9');
}
static bool NotDigit(uc16 c) {
return !IsDigit(c);
}
static bool IsWhiteSpace(uc16 c) {
switch (c) {
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0d:
case 0x20:
case 0xA0:
case 0x2028:
case 0x2029:
return true;
default:
return unibrow::Space::Is(c);
}
}
static bool NotWhiteSpace(uc16 c) {
return !IsWhiteSpace(c);
}
static bool IsWord(uc16 c) {
return ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| (c == '_');
}
static bool NotWord(uc16 c) {
return !IsWord(c);
}
static bool Dot(uc16 c) {
switch (c) {
// CR LF LS PS
case 0x000A: case 0x000D: case 0x2028: case 0x2029:
return false;
default:
return true;
}
}
static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
ZoneScope scope(DELETE_ON_EXIT);
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
CharacterRange::AddClassEscape(c, ranges);
for (unsigned i = 0; i < (1 << 16); i++) {
bool in_class = false;
for (int j = 0; !in_class && j < ranges->length(); j++) {
CharacterRange& range = ranges->at(j);
in_class = (range.from() <= i && i <= range.to());
}
CHECK_EQ(pred(i), in_class);
}
}
TEST(CharacterClassEscapes) {
TestCharacterClassEscapes('.', Dot);
TestCharacterClassEscapes('d', IsDigit);
TestCharacterClassEscapes('D', NotDigit);
TestCharacterClassEscapes('s', IsWhiteSpace);
TestCharacterClassEscapes('S', NotWhiteSpace);
TestCharacterClassEscapes('w', IsWord);
TestCharacterClassEscapes('W', NotWord);
}
static RegExpNode* Compile(const char* input) {
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
if (!v8::internal::ParseRegExp(&reader, &result))
return NULL;
RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false);
return node;
}
static void Execute(const char* input,
const char* str,
bool dot_output = false) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile(input);
USE(node);
#ifdef DEBUG
if (dot_output) {
RegExpEngine::DotPrint(input, node);
exit(0);
}
#endif // DEBUG
}
TEST(Execution) {
V8::Initialize(NULL);
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
}
class TestConfig {
public:
typedef int Key;
typedef int Value;
static const int kNoKey;
static const int kNoValue;
static inline int Compare(int a, int b) {
if (a < b)
return -1;
else if (a > b)
return 1;
else
return 0;
}
};
const int TestConfig::kNoKey = 0;
const int TestConfig::kNoValue = 0;
static int PseudoRandom(int i, int j) {
return ~(~((i * 781) ^ (j * 329)));
}
TEST(SplayTreeSimple) {
static const int kLimit = 1000;
ZoneScope zone_scope(DELETE_ON_EXIT);
ZoneSplayTree<TestConfig> tree;
std::set<int> seen;
#define CHECK_MAPS_EQUAL() do { \
for (int k = 0; k < kLimit; k++) \
CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \
} while (false)
for (int i = 0; i < 50; i++) {
for (int j = 0; j < 50; j++) {
int next = PseudoRandom(i, j) % kLimit;
if (seen.find(next) != seen.end()) {
// We've already seen this one. Check the value and remove
// it.
ZoneSplayTree<TestConfig>::Locator loc;
CHECK(tree.Find(next, &loc));
CHECK_EQ(next, loc.key());
CHECK_EQ(3 * next, loc.value());
tree.Remove(next);
seen.erase(next);
CHECK_MAPS_EQUAL();
} else {
// Check that it wasn't there already and then add it.
ZoneSplayTree<TestConfig>::Locator loc;
CHECK(!tree.Find(next, &loc));
CHECK(tree.Insert(next, &loc));
CHECK_EQ(next, loc.key());
loc.set_value(3 * next);
seen.insert(next);
CHECK_MAPS_EQUAL();
}
int val = PseudoRandom(j, i) % kLimit;
for (int k = val; k >= 0; k--) {
if (seen.find(val) != seen.end()) {
ZoneSplayTree<TestConfig>::Locator loc;
CHECK(tree.FindGreatestLessThan(val, &loc));
CHECK_EQ(loc.key(), val);
break;
}
}
val = PseudoRandom(i + j, i - j) % kLimit;
for (int k = val; k < kLimit; k++) {
if (seen.find(val) != seen.end()) {
ZoneSplayTree<TestConfig>::Locator loc;
CHECK(tree.FindLeastGreaterThan(val, &loc));
CHECK_EQ(loc.key(), val);
break;
}
}
}
}
}
TEST(DispatchTableConstruction) {
// Initialize test data.
static const int kLimit = 1000;
static const int kRangeCount = 8;
static const int kRangeSize = 16;
uc16 ranges[kRangeCount][2 * kRangeSize];
for (int i = 0; i < kRangeCount; i++) {
Vector<uc16> range(ranges[i], 2 * kRangeSize);
for (int j = 0; j < 2 * kRangeSize; j++) {
range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
}
range.Sort();
for (int j = 1; j < 2 * kRangeSize; j++) {
CHECK(range[j-1] <= range[j]);
}
}
// Enter test data into dispatch table.
ZoneScope zone_scope(DELETE_ON_EXIT);
DispatchTable table;
for (int i = 0; i < kRangeCount; i++) {
uc16* range = ranges[i];
for (int j = 0; j < 2 * kRangeSize; j += 2)
table.AddRange(CharacterRange(range[j], range[j + 1]), i);
}
// Check that the table looks as we would expect
for (int p = 0; p < kLimit; p++) {
OutSet* outs = table.Get(p);
for (int j = 0; j < kRangeCount; j++) {
uc16* range = ranges[j];
bool is_on = false;
for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
is_on = (range[k] <= p && p <= range[k + 1]);
CHECK_EQ(is_on, outs->Get(j));
}
}
}
TEST(Assembler) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
#define __ assembler.
Label advance;
Label look_for_foo;
Label fail;
__ GoTo(&look_for_foo);
__ Bind(&advance);
__ AdvanceCP(1);
__ Bind(&look_for_foo);
__ LoadCurrentChar(0, &fail);
__ CheckNotCharacter('f', &advance);
__ LoadCurrentChar(1, &fail);
__ CheckNotCharacter('o', &advance);
__ LoadCurrentChar(2, &fail);
__ CheckNotCharacter('o', &advance);
__ WriteCurrentPositionToRegister(0);
__ WriteCurrentPositionToRegister(1, 2);
__ Succeed();
__ Bind(&fail);
__ Fail();
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
int captures[2];
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(3, captures[0]);
CHECK_EQ(5, captures[1]);
}
TEST(Assembler2) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
#define __ assembler.
// /^.*foo/
Label more_dots;
Label unwind_dot;
Label failure;
Label foo;
Label foo_failed;
Label dot_match;
// ^
__ PushCurrentPosition();
__ PushRegister(0);
__ WriteCurrentPositionToRegister(0);
__ PushBacktrack(&failure);
__ GoTo(&dot_match);
// .*
__ Bind(&more_dots);
__ AdvanceCP(1);
__ Bind(&dot_match);
__ PushCurrentPosition();
__ PushBacktrack(&unwind_dot);
__ LoadCurrentChar(0, &foo);
__ CheckNotCharacter('\n', &more_dots);
// foo
__ Bind(&foo);
__ CheckNotCharacter('f', &foo_failed);
__ LoadCurrentChar(1, &foo_failed);
__ CheckNotCharacter('o', &foo_failed);
__ LoadCurrentChar(2, &foo_failed);
__ CheckNotCharacter('o', &foo_failed);
__ WriteCurrentPositionToRegister(1, 2);
__ Succeed();
__ Break();
__ Bind(&foo_failed);
__ PopBacktrack();
__ Break();
__ Bind(&unwind_dot);
__ PopCurrentPosition();
__ LoadCurrentChar(0, &foo_failed);
__ GoTo(&foo);
__ Bind(&failure);
__ PopRegister(0);
__ PopCurrentPosition();
__ Fail();
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
int captures[2];
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(5, captures[1]);
Handle<String> f4 =
Factory::NewStringFromAscii(CStrVector("football buffoonery"));
Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);
CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(14, captures[1]);
Handle<String> f5 =
Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));
Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);
CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0));
}
TEST(MacroAssembler) {
V8::Initialize(NULL);
byte codes[1024];
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
RegExpMacroAssemblerIrregexp m(&assembler);
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
m.SetRegister(4, 42);
m.PushRegister(4);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3);
m.Succeed();
m.Bind(&fail);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.PopRegister(0);
m.Fail();
v8::HandleScope scope;
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
assembler.Copy(array->GetDataStartAddress());
int captures[5];
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("foobar"));
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
CHECK_EQ(2, captures[3]);
CHECK_EQ(84, captures[4]);
Handle<String> f2 =
Factory::NewStringFromAscii(CStrVector("barfoo"));
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
TEST(AddInverseToTable) {
static const int kLimit = 1000;
static const int kRangeCount = 16;
for (int t = 0; t < 10; t++) {
ZoneScope zone_scope(DELETE_ON_EXIT);
ZoneList<CharacterRange>* ranges =
new ZoneList<CharacterRange>(kRangeCount);
for (int i = 0; i < kRangeCount; i++) {
int from = PseudoRandom(t + 87, i + 25) % kLimit;
int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
if (to > kLimit) to = kLimit;
ranges->Add(CharacterRange(from, to));
}
DispatchTable table;
DispatchTableConstructor cons(&table);
cons.set_choice_index(0);
cons.AddInverse(ranges);
for (int i = 0; i < kLimit; i++) {
bool is_on = false;
for (int j = 0; !is_on && j < kRangeCount; j++)
is_on = ranges->at(j).Contains(i);
OutSet* set = table.Get(i);
CHECK_EQ(is_on, set->Get(0) == false);
}
}
ZoneScope zone_scope(DELETE_ON_EXIT);
ZoneList<CharacterRange>* ranges =
new ZoneList<CharacterRange>(1);
ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
DispatchTable table;
DispatchTableConstructor cons(&table);
cons.set_choice_index(0);
cons.AddInverse(ranges);
CHECK(!table.Get(0xFFFE)->Get(0));
CHECK(table.Get(0xFFFF)->Get(0));
}
static uc32 canonicalize(uc32 c) {
unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
if (count == 0) {
return c;
} else {
CHECK_EQ(1, count);
return canon[0];
}
}
TEST(LatinCanonicalize) {
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
for (char lower = 'a'; lower <= 'z'; lower++) {
char upper = lower + ('A' - 'a');
CHECK_EQ(canonicalize(lower), canonicalize(upper));
unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = un_canonicalize.get(lower, '\0', uncanon);
CHECK_EQ(2, length);
CHECK_EQ(upper, uncanon[0]);
CHECK_EQ(lower, uncanon[1]);
}
for (uc32 c = 128; c < (1 << 21); c++)
CHECK_GE(canonicalize(c), 128);
unibrow::Mapping<unibrow::ToUppercase> to_upper;
for (uc32 c = 0; c < (1 << 21); c++) {
unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
int length = to_upper.get(c, '\0', upper);
if (length == 0) {
length = 1;
upper[0] = c;
}
uc32 u = upper[0];
if (length > 1 || (c >= 128 && u < 128))
u = c;
CHECK_EQ(u, canonicalize(c));
}
}
TEST(SimplePropagation) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile("(a|^b|c)");
CHECK(node->info()->determine_start);
}
static uc32 CanonRange(uc32 c) {
unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
if (count == 0) {
return c;
} else {
CHECK_EQ(1, count);
return canon[0];
}
}
TEST(RangeCanonicalization) {
ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
// Check that we arrive at the same result when using the basic
// range canonicalization primitives as when using immediate
// canonicalization.
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
int range = CanonRange(i);
int indirect_length = 0;
unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
if ((range & CharacterRange::kStartMarker) == 0) {
indirect_length = un_canonicalize.get(i - range, '\0', indirect);
for (int i = 0; i < indirect_length; i++)
indirect[i] += range;
} else {
indirect_length = un_canonicalize.get(i, '\0', indirect);
}
unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int direct_length = un_canonicalize.get(i, '\0', direct);
CHECK_EQ(direct_length, indirect_length);
}
// Check that we arrive at the same results when skipping over
// canonicalization ranges.
int next_block = 0;
while (next_block < CharacterRange::kRangeCanonicalizeMax) {
uc32 start = CanonRange(next_block);
CHECK_NE((start & CharacterRange::kStartMarker), 0);
unsigned dist = start & CharacterRange::kPayloadMask;
unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int first_length = un_canonicalize.get(next_block, '\0', first);
for (unsigned i = 1; i < dist; i++) {
CHECK_EQ(i, CanonRange(i));
unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
CHECK_EQ(first_length, succ_length);
for (int j = 0; j < succ_length; j++) {
int calc = first[j] + i;
int found = succ[j];
CHECK_EQ(calc, found);
}
}
next_block = next_block + dist;
}
}
static void TestRangeCaseIndependence(CharacterRange input,
Vector<CharacterRange> expected) {
ZoneScope zone_scope(DELETE_ON_EXIT);
int count = expected.length();
ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
input.AddCaseEquivalents(list);
CHECK_EQ(count, list->length());
for (int i = 0; i < list->length(); i++) {
CHECK_EQ(expected[i].from(), list->at(i).from());
CHECK_EQ(expected[i].to(), list->at(i).to());
}
}
static void TestSimpleRangeCaseIndependence(CharacterRange input,
CharacterRange expected) {
EmbeddedVector<CharacterRange, 1> vector;
vector[0] = expected;
TestRangeCaseIndependence(input, vector);
}
TEST(CharacterRangeCaseIndependence) {
TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
CharacterRange::Singleton('A'));
TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
CharacterRange::Singleton('Z'));
TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
CharacterRange('A', 'Z'));
TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
CharacterRange('C', 'F'));
TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
CharacterRange('A', 'B'));
TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
CharacterRange('Y', 'Z'));
TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
CharacterRange('A', 'Z'));
TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
CharacterRange('a', 'z'));
TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
CharacterRange('c', 'f'));
TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
CharacterRange('a', 'z'));
// Here we need to add [l-z] to complete the case independence of
// [A-Za-z] but we expect [a-z] to be added since we always add a
// whole block at a time.
TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
CharacterRange('a', 'z'));
}
TEST(Graph) {
V8::Initialize(NULL);
Execute("(x)?\\1y", "", true);
}

View File

@ -26,5 +26,5 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Regression test for bug #743664.
assertEquals("\x60\x60".replace(/\x60/g, "u"), "uu");
assertEquals("\xAB\xAB".replace(/\xAB/g, "u"), "uu");
assertEquals("uu", "\x60\x60".replace(/\x60/g, "u"));
assertEquals("uu", "\xAB\xAB".replace(/\xAB/g, "u"));

View File

@ -89,7 +89,10 @@ assertEquals(result[6], 'F');
// From ecma_3/RegExp/regress-334158.js
assertTrue(/\ca/.test( "\x01" ));
assertFalse(/\ca/.test( "\\ca" ));
assertTrue(/\c[a/]/.test( "\x1ba/]" ));
// Passes in KJS, fails in IrregularExpressions.
// See http://code.google.com/p/v8/issues/detail?id=152
//assertTrue(/\c[a/]/.test( "\x1ba/]" ));
// Test that we handle \s and \S correctly inside some bizarre
// character classes.

View File

@ -0,0 +1,28 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
assertEquals(String.fromCharCode(0x26B), String.fromCharCode(0x2C62).toLowerCase());

View File

@ -9134,6 +9134,32 @@ for (idx in languages) {
assertEquals(munged_sizes[i - 1], munged.length, "munged size " + i);
}
function hex(x) {
x &= 15;
if (x < 10) {
return String.fromCharCode(x + 48);
} else {
return String.fromCharCode(x + 97 - 10);
}
}
function dump_re(re) {
var out = "";
for (var i = 0; i < re.length; i++) {
var c = re.charCodeAt(i);
if (c >= 32 && c <= 126) {
out += re[i];
} else if (c < 256) {
out += "\\x" + hex(c >> 4) + hex(c);
} else {
out += "\\u" + hex(c >> 12) + hex(c >> 8) + hex(c >> 4) + hex(c);
}
}
print ("re = " + out);
}
var thai_l_thingy = "\u0e44";
var thai_l_regexp = new RegExp(thai_l_thingy);
var thai_l_regexp2 = new RegExp("[" + thai_l_thingy + "]");

View File

@ -217,7 +217,7 @@ ecma_3/RegExp/regress-57631: FAIL_OK
# depth 500. KJS detects the case, and return null from the match,
# and passes this test (the test doesn't check for a correct return
# value).
ecma_3/RegExp/regress-119909: FAIL_OK
ecma_3/RegExp/regress-119909: PASS || FAIL_OK
# Difference in the way capturing subpatterns work. In JS, when the
@ -236,6 +236,13 @@ ecma_3/RegExp/regress-209919: FAIL_OK
ecma_3/RegExp/regress-330684: FAIL_OK
# This test contains a regexp that runs exponentially long. Spidermonkey
# standalone will hang, though apparently inside Firefox it will trigger a
# long-running-script timeout. JSCRE passes by hitting the matchLimit and
# just pretending that an exhaustive search found no match.
ecma_3/RegExp/regress-307456: PASS || FAIL_OK
# We do not detect overflow in bounds for back references and {}
# quantifiers. Might fix by parsing numbers differently?
js1_5/Regress/regress-230216-2: FAIL_OK
@ -247,11 +254,11 @@ js1_5/Regress/regress-247179: FAIL_OK
# Regexp too long for PCRE.
js1_5/Regress/regress-280769: FAIL_OK
js1_5/Regress/regress-280769-1: FAIL_OK
js1_5/Regress/regress-280769-2: FAIL_OK
js1_5/Regress/regress-280769-4: FAIL_OK
js1_5/Regress/regress-280769-5: FAIL_OK
js1_5/Regress/regress-280769: PASS || FAIL
js1_5/Regress/regress-280769-1: PASS || FAIL
js1_5/Regress/regress-280769-2: PASS || FAIL
js1_5/Regress/regress-280769-4: PASS || FAIL
js1_5/Regress/regress-280769-5: PASS || FAIL
# We do not support static RegExp.multiline - should we?.
@ -489,7 +496,7 @@ js1_5/Regress/regress-336100: FAIL_OK
# behavior and not the ECMA spec.
ecma_3/RegExp/15.10.2-1: FAIL_OK
ecma_3/RegExp/perlstress-001: FAIL_OK
ecma_3/RegExp/regress-334158: FAIL_OK
ecma_3/RegExp/regress-334158: PASS || FAIL
# This test requires a failure if we try to compile a function with more