Merge regexp2000 back into bleeding_edge
Review URL: http://codereview.chromium.org/12427 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@832 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
112e9ebbe5
commit
b57b4a15cd
@ -35,15 +35,17 @@ Import('context')
|
||||
|
||||
SOURCES = {
|
||||
'all': [
|
||||
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc', 'ast.cc',
|
||||
'bootstrapper.cc', 'builtins.cc', 'checks.cc', 'code-stubs.cc',
|
||||
'codegen.cc', 'compilation-cache.cc', 'compiler.cc', 'contexts.cc',
|
||||
'conversions.cc', 'counters.cc', 'dateparser.cc', 'debug.cc',
|
||||
'disassembler.cc', 'execution.cc', 'factory.cc', 'flags.cc', 'frames.cc',
|
||||
'global-handles.cc', 'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc',
|
||||
'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc', 'objects.cc',
|
||||
'parser.cc', 'property.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc',
|
||||
'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc',
|
||||
'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc',
|
||||
'assembler-irregexp.cc', 'ast.cc', 'bootstrapper.cc', 'builtins.cc',
|
||||
'checks.cc', 'code-stubs.cc', 'codegen.cc', 'compilation-cache.cc',
|
||||
'compiler.cc', 'contexts.cc', 'conversions.cc', 'counters.cc',
|
||||
'dateparser.cc', 'debug.cc', 'disassembler.cc', 'execution.cc',
|
||||
'factory.cc', 'flags.cc', 'frames.cc', 'global-handles.cc',
|
||||
'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc', 'interpreter-irregexp.cc',
|
||||
'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc',
|
||||
'objects.cc', 'parser.cc', 'property.cc', 'regexp-macro-assembler.cc',
|
||||
'regexp-macro-assembler-irregexp.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc',
|
||||
'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc',
|
||||
'spaces.cc', 'string-stream.cc', 'stub-cache.cc', 'token.cc', 'top.cc',
|
||||
'unicode.cc', 'usage-analyzer.cc', 'utils.cc', 'v8-counters.cc',
|
||||
'v8.cc', 'v8threads.cc', 'variables.cc', 'zone.cc'
|
||||
@ -53,7 +55,8 @@ SOURCES = {
|
||||
'macro-assembler-arm.cc', 'stub-cache-arm.cc'],
|
||||
'arch:ia32': ['assembler-ia32.cc', 'builtins-ia32.cc', 'codegen-ia32.cc',
|
||||
'cpu-ia32.cc', 'disasm-ia32.cc', 'frames-ia32.cc', 'ic-ia32.cc',
|
||||
'macro-assembler-ia32.cc', 'stub-cache-ia32.cc'],
|
||||
'macro-assembler-ia32.cc', 'regexp-macro-assembler-ia32.cc',
|
||||
'stub-cache-ia32.cc'],
|
||||
'simulator:arm': ['simulator-arm.cc'],
|
||||
'os:freebsd': ['platform-freebsd.cc'],
|
||||
'os:linux': ['platform-linux.cc'],
|
||||
|
@ -205,6 +205,14 @@ void Assembler::emit(const Immediate& x) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::emit_w(const Immediate& x) {
|
||||
ASSERT(x.rmode_ == RelocInfo::NONE);
|
||||
uint16_t value = static_cast<uint16_t>(x.x_);
|
||||
reinterpret_cast<uint16_t*>(pc_)[0] = value;
|
||||
pc_ += sizeof(uint16_t);
|
||||
}
|
||||
|
||||
|
||||
Address Assembler::target_address_at(Address pc) {
|
||||
return pc + sizeof(int32_t) + *reinterpret_cast<int32_t*>(pc);
|
||||
}
|
||||
|
@ -122,7 +122,8 @@ void CpuFeatures::Probe() {
|
||||
#undef __
|
||||
CodeDesc desc;
|
||||
assm.GetCode(&desc);
|
||||
Object* code = Heap::CreateCode(desc, NULL, Code::ComputeFlags(Code::STUB));
|
||||
Object* code =
|
||||
Heap::CreateCode(desc, NULL, Code::ComputeFlags(Code::STUB), NULL);
|
||||
if (!code->IsCode()) return;
|
||||
F0 f = FUNCTION_CAST<F0>(Code::cast(code)->entry());
|
||||
uint32_t res = f();
|
||||
@ -294,7 +295,6 @@ Assembler::Assembler(void* buffer, int buffer_size) {
|
||||
}
|
||||
buffer_size_ = buffer_size;
|
||||
own_buffer_ = true;
|
||||
|
||||
} else {
|
||||
// use externally provided buffer instead
|
||||
ASSERT(buffer_size > 0);
|
||||
@ -420,6 +420,29 @@ void Assembler::push(const Operand& src) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::push(Label* label, RelocInfo::Mode reloc_mode) {
|
||||
ASSERT_NOT_NULL(label);
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
// If reloc_mode == NONE, the label is stored as buffer relative.
|
||||
ASSERT(reloc_mode == RelocInfo::NONE);
|
||||
if (label->is_bound()) {
|
||||
// Index of position in Code object:
|
||||
int pos = label->pos() + Code::kHeaderSize;
|
||||
if (pos >= 0 && pos < 256) {
|
||||
EMIT(0x6a);
|
||||
EMIT(pos);
|
||||
} else {
|
||||
EMIT(0x68);
|
||||
emit(pos);
|
||||
}
|
||||
} else {
|
||||
EMIT(0x68);
|
||||
emit_disp(label, Displacement::CODE_RELATIVE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Assembler::pop(Register dst) {
|
||||
ASSERT(reloc_info_writer.last_pc() != NULL);
|
||||
if (FLAG_push_pop_elimination && (reloc_info_writer.last_pc() <= last_pc_)) {
|
||||
@ -546,6 +569,22 @@ void Assembler::pop(const Operand& dst) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::enter(const Immediate& size) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xC8);
|
||||
emit_w(size);
|
||||
EMIT(0);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::leave() {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xC9);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::mov_b(Register dst, const Operand& src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
@ -830,6 +869,23 @@ void Assembler::cmp(const Operand& op, const Immediate& imm) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::rep_cmpsb() {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xFC); // CLD to ensure forward operation
|
||||
EMIT(0xF3); // REP
|
||||
EMIT(0xA6); // CMPSB
|
||||
}
|
||||
|
||||
void Assembler::rep_cmpsw() {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xFC); // CLD to ensure forward operation
|
||||
EMIT(0xF3); // REP
|
||||
EMIT(0xA7); // CMPSW
|
||||
}
|
||||
|
||||
|
||||
void Assembler::dec_b(Register dst) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
@ -1074,6 +1130,14 @@ void Assembler::shr(Register dst) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::shr_cl(Register dst) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xD1);
|
||||
EMIT(0xE8 | dst.code());
|
||||
}
|
||||
|
||||
|
||||
void Assembler::sub(const Operand& dst, const Immediate& x) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
@ -1171,6 +1235,15 @@ void Assembler::xor_(const Operand& dst, const Immediate& x) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::bt(const Operand& dst, Register src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0x0F);
|
||||
EMIT(0xA3);
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::bts(const Operand& dst, Register src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
@ -1224,13 +1297,6 @@ void Assembler::ret(int imm16) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::leave() {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0xC9);
|
||||
}
|
||||
|
||||
|
||||
// Labels refer to positions in the (to be) generated code.
|
||||
// There are bound, linked, and unused labels.
|
||||
//
|
||||
@ -1270,12 +1336,16 @@ void Assembler::bind_to(Label* L, int pos) {
|
||||
while (L->is_linked()) {
|
||||
Displacement disp = disp_at(L);
|
||||
int fixup_pos = L->pos();
|
||||
if (disp.type() == Displacement::UNCONDITIONAL_JUMP) {
|
||||
ASSERT(byte_at(fixup_pos - 1) == 0xE9); // jmp expected
|
||||
if (disp.type() == Displacement::CODE_RELATIVE) {
|
||||
long_at_put(fixup_pos, pos + Code::kHeaderSize);
|
||||
} else {
|
||||
if (disp.type() == Displacement::UNCONDITIONAL_JUMP) {
|
||||
ASSERT(byte_at(fixup_pos - 1) == 0xE9); // jmp expected
|
||||
}
|
||||
// relative address, relative to point after address
|
||||
int imm32 = pos - (fixup_pos + sizeof(int32_t));
|
||||
long_at_put(fixup_pos, imm32);
|
||||
}
|
||||
// relative address, relative to point after address
|
||||
int imm32 = pos - (fixup_pos + sizeof(int32_t));
|
||||
long_at_put(fixup_pos, imm32);
|
||||
disp.next(L);
|
||||
}
|
||||
L->bind_to(pos);
|
||||
|
@ -118,8 +118,8 @@ enum Condition {
|
||||
not_equal = 5,
|
||||
below_equal = 6,
|
||||
above = 7,
|
||||
sign = 8,
|
||||
not_sign = 9,
|
||||
negative = 8,
|
||||
positive = 9,
|
||||
parity_even = 10,
|
||||
parity_odd = 11,
|
||||
less = 12,
|
||||
@ -128,10 +128,12 @@ enum Condition {
|
||||
greater = 15,
|
||||
|
||||
// aliases
|
||||
carry = below,
|
||||
not_carry = above_equal,
|
||||
zero = equal,
|
||||
not_zero = not_equal,
|
||||
negative = sign,
|
||||
positive = not_sign
|
||||
sign = negative,
|
||||
not_sign = positive
|
||||
};
|
||||
|
||||
|
||||
@ -283,13 +285,14 @@ class Operand BASE_EMBEDDED {
|
||||
//
|
||||
// Displacement _data field layout
|
||||
//
|
||||
// |31.....1| ......0|
|
||||
// |31.....2|1......0|
|
||||
// [ next | type |
|
||||
|
||||
class Displacement BASE_EMBEDDED {
|
||||
public:
|
||||
enum Type {
|
||||
UNCONDITIONAL_JUMP,
|
||||
CODE_RELATIVE,
|
||||
OTHER
|
||||
};
|
||||
|
||||
@ -313,8 +316,8 @@ class Displacement BASE_EMBEDDED {
|
||||
private:
|
||||
int data_;
|
||||
|
||||
class TypeField: public BitField<Type, 0, 1> {};
|
||||
class NextField: public BitField<int, 1, 32-1> {};
|
||||
class TypeField: public BitField<Type, 0, 2> {};
|
||||
class NextField: public BitField<int, 2, 32-2> {};
|
||||
|
||||
void init(Label* L, Type type);
|
||||
};
|
||||
@ -440,10 +443,14 @@ class Assembler : public Malloced {
|
||||
void push(const Immediate& x);
|
||||
void push(Register src);
|
||||
void push(const Operand& src);
|
||||
void push(Label* label, RelocInfo::Mode relocation_mode);
|
||||
|
||||
void pop(Register dst);
|
||||
void pop(const Operand& dst);
|
||||
|
||||
void enter(const Immediate& size);
|
||||
void leave();
|
||||
|
||||
// Moves
|
||||
void mov_b(Register dst, const Operand& src);
|
||||
void mov_b(const Operand& dst, int8_t imm8);
|
||||
@ -491,6 +498,9 @@ class Assembler : public Malloced {
|
||||
void cmp(Register reg, const Operand& op);
|
||||
void cmp(const Operand& op, const Immediate& imm);
|
||||
|
||||
void rep_cmpsb();
|
||||
void rep_cmpsw();
|
||||
|
||||
void dec_b(Register dst);
|
||||
|
||||
void dec(Register dst);
|
||||
@ -535,6 +545,7 @@ class Assembler : public Malloced {
|
||||
|
||||
void shr(Register dst, uint8_t imm8);
|
||||
void shr(Register dst);
|
||||
void shr_cl(Register dst);
|
||||
|
||||
void sub(const Operand& dst, const Immediate& x);
|
||||
void sub(Register dst, const Operand& src);
|
||||
@ -550,6 +561,7 @@ class Assembler : public Malloced {
|
||||
void xor_(const Operand& dst, const Immediate& x);
|
||||
|
||||
// Bit operations.
|
||||
void bt(const Operand& dst, Register src);
|
||||
void bts(const Operand& dst, Register src);
|
||||
|
||||
// Miscellaneous
|
||||
@ -558,7 +570,6 @@ class Assembler : public Malloced {
|
||||
void nop();
|
||||
void rdtsc();
|
||||
void ret(int imm16);
|
||||
void leave();
|
||||
|
||||
// Label operations & relative jumps (PPUM Appendix D)
|
||||
//
|
||||
@ -748,6 +759,7 @@ class Assembler : public Malloced {
|
||||
inline void emit(Handle<Object> handle);
|
||||
inline void emit(uint32_t x, RelocInfo::Mode rmode);
|
||||
inline void emit(const Immediate& x);
|
||||
inline void emit_w(const Immediate& x);
|
||||
|
||||
// instruction generation
|
||||
void emit_arith_b(int op1, int op2, Register dst, int imm8);
|
||||
|
82
src/assembler-irregexp-inl.h
Normal file
82
src/assembler-irregexp-inl.h
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// A light-weight assembler for the Regexp2000 byte code.
|
||||
|
||||
|
||||
#include "v8.h"
|
||||
#include "ast.h"
|
||||
#include "bytecodes-irregexp.h"
|
||||
#include "assembler-irregexp.h"
|
||||
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
void IrregexpAssembler::Emit(uint32_t byte) {
|
||||
ASSERT(pc_ <= buffer_.length());
|
||||
if (pc_ == buffer_.length()) {
|
||||
Expand();
|
||||
}
|
||||
buffer_[pc_++] = byte;
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Emit16(uint32_t word) {
|
||||
ASSERT(pc_ <= buffer_.length());
|
||||
if (pc_ + 1 >= buffer_.length()) {
|
||||
Expand();
|
||||
}
|
||||
Store16(buffer_.start() + pc_, word);
|
||||
pc_ += 2;
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Emit32(uint32_t word) {
|
||||
ASSERT(pc_ <= buffer_.length());
|
||||
if (pc_ + 3 >= buffer_.length()) {
|
||||
Expand();
|
||||
}
|
||||
Store32(buffer_.start() + pc_, word);
|
||||
pc_ += 4;
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::EmitOrLink(Label* l) {
|
||||
if (l->is_bound()) {
|
||||
Emit32(l->pos());
|
||||
} else {
|
||||
int pos = 0;
|
||||
if (l->is_linked()) {
|
||||
pos = l->pos();
|
||||
}
|
||||
l->link_to(pc_);
|
||||
Emit32(pos);
|
||||
}
|
||||
}
|
||||
|
||||
} } // namespace v8::internal
|
339
src/assembler-irregexp.cc
Normal file
339
src/assembler-irregexp.cc
Normal file
@ -0,0 +1,339 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// A light-weight assembler for the Irregexp byte code.
|
||||
|
||||
|
||||
#include "v8.h"
|
||||
#include "ast.h"
|
||||
#include "bytecodes-irregexp.h"
|
||||
#include "assembler-irregexp.h"
|
||||
|
||||
#include "assembler-irregexp-inl.h"
|
||||
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
IrregexpAssembler::IrregexpAssembler(Vector<byte> buffer)
|
||||
: buffer_(buffer),
|
||||
pc_(0),
|
||||
own_buffer_(false) {
|
||||
}
|
||||
|
||||
|
||||
IrregexpAssembler::~IrregexpAssembler() {
|
||||
if (own_buffer_) {
|
||||
buffer_.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PushCurrentPosition(int cp_offset) {
|
||||
ASSERT(cp_offset >= 0);
|
||||
Emit(BC_PUSH_CP);
|
||||
Emit32(cp_offset);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PushBacktrack(Label* l) {
|
||||
Emit(BC_PUSH_BT);
|
||||
EmitOrLink(l);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PushRegister(int index) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_PUSH_REGISTER);
|
||||
Emit(index);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::WriteCurrentPositionToRegister(int index,
|
||||
int cp_offset) {
|
||||
ASSERT(cp_offset >= 0);
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_SET_REGISTER_TO_CP);
|
||||
Emit(index);
|
||||
Emit32(cp_offset);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::ReadCurrentPositionFromRegister(int index) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_SET_CP_TO_REGISTER);
|
||||
Emit(index);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::WriteStackPointerToRegister(int index) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_SET_REGISTER_TO_SP);
|
||||
Emit(index);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::ReadStackPointerFromRegister(int index) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_SET_SP_TO_REGISTER);
|
||||
Emit(index);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::SetRegister(int index, int value) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_SET_REGISTER);
|
||||
Emit(index);
|
||||
Emit32(value);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::AdvanceRegister(int index, int by) {
|
||||
ASSERT(index >= 0);
|
||||
Emit(BC_ADVANCE_REGISTER);
|
||||
Emit(index);
|
||||
Emit32(by);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PopCurrentPosition() {
|
||||
Emit(BC_POP_CP);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PopBacktrack() {
|
||||
Emit(BC_POP_BT);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::PopRegister(int index) {
|
||||
Emit(BC_POP_REGISTER);
|
||||
Emit(index);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Fail() {
|
||||
Emit(BC_FAIL);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Break() {
|
||||
Emit(BC_BREAK);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Succeed() {
|
||||
Emit(BC_SUCCEED);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Bind(Label* l) {
|
||||
ASSERT(!l->is_bound());
|
||||
if (l->is_linked()) {
|
||||
int pos = l->pos();
|
||||
while (pos != 0) {
|
||||
int fixup = pos;
|
||||
pos = Load32(buffer_.start() + fixup);
|
||||
Store32(buffer_.start() + fixup, pc_);
|
||||
}
|
||||
}
|
||||
l->bind_to(pc_);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::AdvanceCP(int cp_offset) {
|
||||
Emit(BC_ADVANCE_CP);
|
||||
Emit32(cp_offset);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::GoTo(Label* l) {
|
||||
Emit(BC_GOTO);
|
||||
EmitOrLink(l);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::LoadCurrentChar(int cp_offset, Label* on_end) {
|
||||
Emit(BC_LOAD_CURRENT_CHAR);
|
||||
Emit32(cp_offset);
|
||||
EmitOrLink(on_end);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckCharacter(uc16 c, Label* on_match) {
|
||||
Emit(BC_CHECK_CHAR);
|
||||
Emit16(c);
|
||||
EmitOrLink(on_match);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckNotCharacter(uc16 c, Label* on_mismatch) {
|
||||
Emit(BC_CHECK_NOT_CHAR);
|
||||
Emit16(c);
|
||||
EmitOrLink(on_mismatch);
|
||||
}
|
||||
|
||||
void IrregexpAssembler::OrThenCheckNotCharacter(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_mismatch) {
|
||||
Emit(BC_OR_CHECK_NOT_CHAR);
|
||||
Emit16(c);
|
||||
Emit16(mask);
|
||||
EmitOrLink(on_mismatch);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::MinusOrThenCheckNotCharacter(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_mismatch) {
|
||||
Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
|
||||
Emit16(c);
|
||||
Emit16(mask);
|
||||
EmitOrLink(on_mismatch);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
|
||||
Emit(BC_CHECK_LT);
|
||||
Emit16(limit);
|
||||
EmitOrLink(on_less);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
|
||||
Emit(BC_CHECK_GT);
|
||||
Emit16(limit);
|
||||
EmitOrLink(on_greater);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckNotBackReference(int capture_index,
|
||||
Label* on_mismatch) {
|
||||
Emit(BC_CHECK_NOT_BACK_REF);
|
||||
Emit(capture_index);
|
||||
EmitOrLink(on_mismatch);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckRegister(int byte_code,
|
||||
int reg_index,
|
||||
uint16_t vs,
|
||||
Label* on_true) {
|
||||
Emit(byte_code);
|
||||
Emit(reg_index);
|
||||
Emit16(vs);
|
||||
EmitOrLink(on_true);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckRegisterLT(int reg_index,
|
||||
uint16_t vs,
|
||||
Label* on_less_than) {
|
||||
CheckRegister(BC_CHECK_REGISTER_LT, reg_index, vs, on_less_than);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::CheckRegisterGE(int reg_index,
|
||||
uint16_t vs,
|
||||
Label* on_greater_than_equal) {
|
||||
CheckRegister(BC_CHECK_REGISTER_GE, reg_index, vs, on_greater_than_equal);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::LookupMap1(uc16 start, Label* bit_map, Label* on_zero) {
|
||||
Emit(BC_LOOKUP_MAP1);
|
||||
Emit16(start);
|
||||
EmitOrLink(bit_map);
|
||||
EmitOrLink(on_zero);
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::LookupMap2(uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& table) {
|
||||
Emit(BC_LOOKUP_MAP2);
|
||||
Emit16(start);
|
||||
EmitOrLink(half_nibble_map);
|
||||
ASSERT(table.length() > 0);
|
||||
ASSERT(table.length() <= 4);
|
||||
for (int i = 0; i < table.length(); i++) {
|
||||
EmitOrLink(table[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::LookupMap8(uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& table) {
|
||||
Emit(BC_LOOKUP_MAP8);
|
||||
Emit16(start);
|
||||
EmitOrLink(byte_map);
|
||||
ASSERT(table.length() > 0);
|
||||
ASSERT(table.length() <= 256);
|
||||
for (int i = 0; i < table.length(); i++) {
|
||||
EmitOrLink(table[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::LookupHighMap8(byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& table) {
|
||||
Emit(BC_LOOKUP_HI_MAP8);
|
||||
Emit(start);
|
||||
EmitOrLink(byte_map);
|
||||
ASSERT(table.length() > 0);
|
||||
ASSERT(table.length() <= 256);
|
||||
for (int i = 0; i < table.length(); i++) {
|
||||
EmitOrLink(table[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int IrregexpAssembler::length() {
|
||||
return pc_;
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Copy(Address a) {
|
||||
memcpy(a, buffer_.start(), length());
|
||||
}
|
||||
|
||||
|
||||
void IrregexpAssembler::Expand() {
|
||||
bool old_buffer_was_our_own = own_buffer_;
|
||||
Vector<byte> old_buffer = buffer_;
|
||||
buffer_ = Vector<byte>::New(old_buffer.length() * 2);
|
||||
own_buffer_ = true;
|
||||
memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
|
||||
if (old_buffer_was_our_own) {
|
||||
old_buffer.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
137
src/assembler-irregexp.h
Normal file
137
src/assembler-irregexp.h
Normal file
@ -0,0 +1,137 @@
|
||||
// Copyright 2006-2008 the V8 project authors. All rights reserved.
|
||||
|
||||
// A light-weight assembler for the Irregexp byte code.
|
||||
|
||||
#ifndef V8_ASSEMBLER_IRREGEXP_H_
|
||||
#define V8_ASSEMBLER_IRREGEXP_H_
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
class IrregexpAssembler {
|
||||
public:
|
||||
// Create an assembler. Instructions and relocation information are emitted
|
||||
// into a buffer, with the instructions starting from the beginning and the
|
||||
// relocation information starting from the end of the buffer. See CodeDesc
|
||||
// for a detailed comment on the layout (globals.h).
|
||||
//
|
||||
// If the provided buffer is NULL, the assembler allocates and grows its own
|
||||
// buffer, and buffer_size determines the initial buffer size. The buffer is
|
||||
// owned by the assembler and deallocated upon destruction of the assembler.
|
||||
//
|
||||
// If the provided buffer is not NULL, the assembler uses the provided buffer
|
||||
// for code generation and assumes its size to be buffer_size. If the buffer
|
||||
// is too small, a fatal error occurs. No deallocation of the buffer is done
|
||||
// upon destruction of the assembler.
|
||||
explicit IrregexpAssembler(Vector<byte>);
|
||||
~IrregexpAssembler();
|
||||
|
||||
// CP = current position in source.
|
||||
// BT = backtrack label.
|
||||
|
||||
// Stack.
|
||||
void PushCurrentPosition(int cp_offset = 0);
|
||||
void PushBacktrack(Label* l);
|
||||
void PushRegister(int index);
|
||||
void WriteCurrentPositionToRegister(int index, int cp_offset = 0);
|
||||
void ReadCurrentPositionFromRegister(int index);
|
||||
void WriteStackPointerToRegister(int index);
|
||||
void ReadStackPointerFromRegister(int index);
|
||||
void SetRegister(int index, int value);
|
||||
void AdvanceRegister(int index, int by);
|
||||
|
||||
void PopCurrentPosition();
|
||||
void PopBacktrack();
|
||||
void PopRegister(int index);
|
||||
|
||||
void Fail();
|
||||
void Succeed();
|
||||
|
||||
void Break(); // This instruction will cause a fatal VM error if hit.
|
||||
|
||||
void Bind(Label* l); // Binds an unbound label L to the current code posn.
|
||||
|
||||
void AdvanceCP(int by);
|
||||
|
||||
void GoTo(Label* l);
|
||||
|
||||
// Loads current char into a machine register. Jumps to the label if we
|
||||
// reached the end of the subject string. Fall through otherwise.
|
||||
void LoadCurrentChar(int cp_offset, Label* on_end);
|
||||
|
||||
// Checks current char register against a singleton.
|
||||
void CheckCharacter(uc16 c, Label* on_match);
|
||||
void CheckNotCharacter(uc16 c, Label* on_mismatch);
|
||||
void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
|
||||
void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
|
||||
|
||||
// Used to check current char register against a range.
|
||||
void CheckCharacterLT(uc16 limit, Label* on_less);
|
||||
void CheckCharacterGT(uc16 limit, Label* on_greater);
|
||||
|
||||
// Checks current position for a match against a
|
||||
// previous capture. Advances current position by the length of the capture
|
||||
// iff it matches. The capture is stored in a given register and the
|
||||
// the register after. If a register contains -1 then the other register
|
||||
// must always contain -1 and the on_mismatch label will never be called.
|
||||
void CheckNotBackReference(int capture_index, Label* on_mismatch);
|
||||
|
||||
// Checks a register for strictly-less-than or greater-than-or-equal.
|
||||
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);
|
||||
void CheckRegisterGE(int reg_index, uint16_t vs, Label* on_greater_equal);
|
||||
|
||||
// Subtracts a 16 bit value from the current character, uses the result to
|
||||
// look up in a bit array, uses the result of that decide whether to fall
|
||||
// though (on 1) or jump to the on_zero label (on 0).
|
||||
void LookupMap1(uc16 start, Label* bit_map, Label* on_zero);
|
||||
|
||||
// Subtracts a 16 bit value from the current character, uses the result to
|
||||
// look up in a 2-bit array, uses the result of that to look up in a label
|
||||
// table and jumps to the label.
|
||||
void LookupMap2(uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& table);
|
||||
|
||||
// Subtracts a 16 bit value from the current character, uses the result to
|
||||
// look up in a byte array, uses the result of that to look up in a label
|
||||
// array and jumps to the label.
|
||||
void LookupMap8(uc16 start, Label* byte_map, const Vector<Label*>& table);
|
||||
|
||||
// Takes the high byte of the current character, uses the result to
|
||||
// look up in a byte array, uses the result of that to look up in a label
|
||||
// array and jumps to the label.
|
||||
void LookupHighMap8(byte start, Label* byte_map, const Vector<Label*>& table);
|
||||
|
||||
// Code and bitmap emission.
|
||||
inline void Emit32(uint32_t x);
|
||||
inline void Emit16(uint32_t x);
|
||||
inline void Emit(uint32_t x);
|
||||
|
||||
// Bytecode buffer.
|
||||
int length();
|
||||
void Copy(Address a);
|
||||
|
||||
inline void EmitOrLink(Label* l);
|
||||
private:
|
||||
// Don't use this.
|
||||
IrregexpAssembler() { UNREACHABLE(); }
|
||||
// The buffer into which code and relocation info are generated.
|
||||
Vector<byte> buffer_;
|
||||
|
||||
inline void CheckRegister(int byte_code,
|
||||
int reg_index,
|
||||
uint16_t vs,
|
||||
Label* on_true);
|
||||
// Code generation.
|
||||
int pc_; // The program counter; moves forward.
|
||||
|
||||
// True if the assembler owns the buffer, false if buffer is external.
|
||||
bool own_buffer_;
|
||||
|
||||
void Expand();
|
||||
};
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_ASSEMBLER_IRREGEXP_H_
|
@ -50,7 +50,8 @@ namespace v8 { namespace internal {
|
||||
|
||||
class Label : public ZoneObject { // LabelShadows are dynamically allocated.
|
||||
public:
|
||||
INLINE(Label()) { Unuse(); }
|
||||
INLINE(Label())
|
||||
{ Unuse(); }
|
||||
INLINE(~Label()) { ASSERT(!is_linked()); }
|
||||
|
||||
INLINE(void Unuse()) { pos_ = 0; }
|
||||
@ -82,8 +83,10 @@ class Label : public ZoneObject { // LabelShadows are dynamically allocated.
|
||||
}
|
||||
|
||||
friend class Assembler;
|
||||
friend class RegexpAssembler;
|
||||
friend class Displacement;
|
||||
friend class LabelShadow;
|
||||
friend class IrregexpAssembler;
|
||||
};
|
||||
|
||||
|
||||
|
201
src/ast.cc
201
src/ast.cc
@ -29,6 +29,7 @@
|
||||
|
||||
#include "ast.h"
|
||||
#include "scopes.h"
|
||||
#include "string-stream.h"
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
@ -179,4 +180,204 @@ void Visitor::VisitExpressions(ZoneList<Expression*>* expressions) {
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Regular expressions
|
||||
|
||||
#define MAKE_ACCEPT(Name) \
|
||||
void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
|
||||
return visitor->Visit##Name(this, data); \
|
||||
}
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
|
||||
#undef MAKE_ACCEPT
|
||||
|
||||
#define MAKE_TYPE_CASE(Name) \
|
||||
RegExp##Name* RegExpTree::As##Name() { \
|
||||
return NULL; \
|
||||
} \
|
||||
bool RegExpTree::Is##Name() { return false; }
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
|
||||
#undef MAKE_TYPE_CASE
|
||||
|
||||
#define MAKE_TYPE_CASE(Name) \
|
||||
RegExp##Name* RegExp##Name::As##Name() { \
|
||||
return this; \
|
||||
} \
|
||||
bool RegExp##Name::Is##Name() { return true; }
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
|
||||
#undef MAKE_TYPE_CASE
|
||||
|
||||
RegExpEmpty RegExpEmpty::kInstance;
|
||||
|
||||
|
||||
// Convert regular expression trees to a simple sexp representation.
|
||||
// This representation should be different from the input grammar
|
||||
// in as many cases as possible, to make it more difficult for incorrect
|
||||
// parses to look as correct ones which is likely if the input and
|
||||
// output formats are alike.
|
||||
class RegExpUnparser: public RegExpVisitor {
|
||||
public:
|
||||
RegExpUnparser();
|
||||
void VisitCharacterRange(CharacterRange that);
|
||||
SmartPointer<const char> ToString() { return stream_.ToCString(); }
|
||||
#define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data);
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
|
||||
#undef MAKE_CASE
|
||||
private:
|
||||
StringStream* stream() { return &stream_; }
|
||||
HeapStringAllocator alloc_;
|
||||
StringStream stream_;
|
||||
};
|
||||
|
||||
|
||||
RegExpUnparser::RegExpUnparser() : stream_(&alloc_) {
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
|
||||
stream()->Add("(|");
|
||||
for (int i = 0; i < that->alternatives()->length(); i++) {
|
||||
stream()->Add(" ");
|
||||
that->alternatives()->at(i)->Accept(this, data);
|
||||
}
|
||||
stream()->Add(")");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
|
||||
stream()->Add("(:");
|
||||
for (int i = 0; i < that->nodes()->length(); i++) {
|
||||
stream()->Add(" ");
|
||||
that->nodes()->at(i)->Accept(this, data);
|
||||
}
|
||||
stream()->Add(")");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
|
||||
stream()->Add("%k", that.from());
|
||||
if (!that.IsSingleton()) {
|
||||
stream()->Add("-%k", that.to());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
|
||||
void* data) {
|
||||
if (that->is_negated())
|
||||
stream()->Add("^");
|
||||
stream()->Add("[");
|
||||
for (int i = 0; i < that->ranges()->length(); i++) {
|
||||
if (i > 0) stream()->Add(" ");
|
||||
VisitCharacterRange(that->ranges()->at(i));
|
||||
}
|
||||
stream()->Add("]");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
|
||||
switch (that->type()) {
|
||||
case RegExpAssertion::START_OF_INPUT:
|
||||
stream()->Add("@^i");
|
||||
break;
|
||||
case RegExpAssertion::END_OF_INPUT:
|
||||
stream()->Add("@$i");
|
||||
break;
|
||||
case RegExpAssertion::START_OF_LINE:
|
||||
stream()->Add("@^l");
|
||||
break;
|
||||
case RegExpAssertion::END_OF_LINE:
|
||||
stream()->Add("@$l");
|
||||
break;
|
||||
case RegExpAssertion::BOUNDARY:
|
||||
stream()->Add("@b");
|
||||
break;
|
||||
case RegExpAssertion::NON_BOUNDARY:
|
||||
stream()->Add("@B");
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
|
||||
stream()->Add("'");
|
||||
Vector<const uc16> chardata = that->data();
|
||||
for (int i = 0; i < chardata.length(); i++) {
|
||||
stream()->Add("%k", chardata[i]);
|
||||
}
|
||||
stream()->Add("'");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
|
||||
if (that->elements()->length() == 1) {
|
||||
that->elements()->at(0).data.u_atom->Accept(this, data);
|
||||
} else {
|
||||
stream()->Add("(!");
|
||||
for (int i = 0; i < that->elements()->length(); i++) {
|
||||
stream()->Add(" ");
|
||||
that->elements()->at(i).data.u_atom->Accept(this, data);
|
||||
}
|
||||
stream()->Add(")");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
|
||||
stream()->Add("(# %i ", that->min());
|
||||
if (that->max() == RegExpQuantifier::kInfinity) {
|
||||
stream()->Add("- ");
|
||||
} else {
|
||||
stream()->Add("%i ", that->max());
|
||||
}
|
||||
stream()->Add(that->is_greedy() ? "g " : "n ");
|
||||
that->body()->Accept(this, data);
|
||||
stream()->Add(")");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
|
||||
stream()->Add("(^ ");
|
||||
that->body()->Accept(this, data);
|
||||
stream()->Add(")");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
|
||||
stream()->Add("(-> ");
|
||||
stream()->Add(that->is_positive() ? "+ " : "- ");
|
||||
that->body()->Accept(this, data);
|
||||
stream()->Add(")");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
|
||||
void* data) {
|
||||
stream()->Add("(<- %i)", that->index());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
|
||||
stream()->Put('%');
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
SmartPointer<const char> RegExpTree::ToString() {
|
||||
RegExpUnparser unparser;
|
||||
Accept(&unparser, NULL);
|
||||
return unparser.ToString();
|
||||
}
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
263
src/ast.h
263
src/ast.h
@ -34,6 +34,7 @@
|
||||
#include "token.h"
|
||||
#include "variables.h"
|
||||
#include "macro-assembler.h"
|
||||
#include "jsregexp.h"
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
@ -1191,6 +1192,268 @@ class ThisFunction: public Expression {
|
||||
};
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Regular expressions
|
||||
|
||||
|
||||
class RegExpTree: public ZoneObject {
|
||||
public:
|
||||
virtual ~RegExpTree() { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure) = 0;
|
||||
virtual bool IsTextElement() { return false; }
|
||||
virtual void AppendToText(RegExpText* text);
|
||||
SmartPointer<const char> ToString();
|
||||
#define MAKE_ASTYPE(Name) \
|
||||
virtual RegExp##Name* As##Name(); \
|
||||
virtual bool Is##Name();
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
|
||||
#undef MAKE_ASTYPE
|
||||
};
|
||||
|
||||
|
||||
class RegExpDisjunction: public RegExpTree {
|
||||
public:
|
||||
explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
|
||||
: alternatives_(alternatives) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpDisjunction* AsDisjunction();
|
||||
virtual bool IsDisjunction();
|
||||
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
|
||||
private:
|
||||
ZoneList<RegExpTree*>* alternatives_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpAlternative: public RegExpTree {
|
||||
public:
|
||||
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpAlternative* AsAlternative();
|
||||
virtual bool IsAlternative();
|
||||
ZoneList<RegExpTree*>* nodes() { return nodes_; }
|
||||
private:
|
||||
ZoneList<RegExpTree*>* nodes_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpText: public RegExpTree {
|
||||
public:
|
||||
RegExpText() : elements_(2) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpText* AsText();
|
||||
virtual bool IsText();
|
||||
virtual bool IsTextElement() { return true; }
|
||||
virtual void AppendToText(RegExpText* text);
|
||||
void AddElement(TextElement elm) { elements_.Add(elm); }
|
||||
ZoneList<TextElement>* elements() { return &elements_; }
|
||||
private:
|
||||
ZoneList<TextElement> elements_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpAssertion: public RegExpTree {
|
||||
public:
|
||||
enum Type {
|
||||
START_OF_LINE, START_OF_INPUT, END_OF_LINE, END_OF_INPUT,
|
||||
BOUNDARY, NON_BOUNDARY
|
||||
};
|
||||
explicit RegExpAssertion(Type type) : type_(type) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpAssertion* AsAssertion();
|
||||
virtual bool IsAssertion();
|
||||
Type type() { return type_; }
|
||||
private:
|
||||
Type type_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpCharacterClass: public RegExpTree {
|
||||
public:
|
||||
RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
|
||||
: ranges_(ranges),
|
||||
is_negated_(is_negated) { }
|
||||
explicit RegExpCharacterClass(uc16 type)
|
||||
: ranges_(new ZoneList<CharacterRange>(2)),
|
||||
is_negated_(false) {
|
||||
CharacterRange::AddClassEscape(type, ranges_);
|
||||
}
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpCharacterClass* AsCharacterClass();
|
||||
virtual bool IsCharacterClass();
|
||||
virtual bool IsTextElement() { return true; }
|
||||
virtual void AppendToText(RegExpText* text);
|
||||
ZoneList<CharacterRange>* ranges() { return ranges_; }
|
||||
bool is_negated() { return is_negated_; }
|
||||
private:
|
||||
ZoneList<CharacterRange>* ranges_;
|
||||
bool is_negated_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpAtom: public RegExpTree {
|
||||
public:
|
||||
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpAtom* AsAtom();
|
||||
virtual bool IsAtom();
|
||||
virtual bool IsTextElement() { return true; }
|
||||
virtual void AppendToText(RegExpText* text);
|
||||
Vector<const uc16> data() { return data_; }
|
||||
private:
|
||||
Vector<const uc16> data_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpQuantifier: public RegExpTree {
|
||||
public:
|
||||
RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
|
||||
: min_(min),
|
||||
max_(max),
|
||||
is_greedy_(is_greedy),
|
||||
body_(body) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
static RegExpNode* ToNode(int min,
|
||||
int max,
|
||||
bool is_greedy,
|
||||
RegExpTree* body,
|
||||
RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpQuantifier* AsQuantifier();
|
||||
virtual bool IsQuantifier();
|
||||
int min() { return min_; }
|
||||
int max() { return max_; }
|
||||
bool is_greedy() { return is_greedy_; }
|
||||
RegExpTree* body() { return body_; }
|
||||
// We just use a very large integer value as infinity because 2^30
|
||||
// is infinite in practice.
|
||||
static const int kInfinity = (1 << 30);
|
||||
private:
|
||||
int min_;
|
||||
int max_;
|
||||
bool is_greedy_;
|
||||
RegExpTree* body_;
|
||||
};
|
||||
|
||||
|
||||
enum CaptureAvailability {
|
||||
CAPTURE_AVAILABLE, CAPTURE_UNREACHABLE, CAPTURE_PERMANENTLY_UNREACHABLE };
|
||||
|
||||
class RegExpCapture: public RegExpTree {
|
||||
public:
|
||||
explicit RegExpCapture(RegExpTree* body, int index)
|
||||
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
static RegExpNode* ToNode(RegExpTree* body,
|
||||
int index,
|
||||
RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpCapture* AsCapture();
|
||||
virtual bool IsCapture();
|
||||
RegExpTree* body() { return body_; }
|
||||
int index() { return index_; }
|
||||
inline CaptureAvailability available() { return available_; }
|
||||
inline void set_available(CaptureAvailability availability) {
|
||||
available_ = availability;
|
||||
}
|
||||
static int StartRegister(int index) { return index * 2; }
|
||||
static int EndRegister(int index) { return index * 2 + 1; }
|
||||
private:
|
||||
RegExpTree* body_;
|
||||
int index_;
|
||||
CaptureAvailability available_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpLookahead: public RegExpTree {
|
||||
public:
|
||||
RegExpLookahead(RegExpTree* body, bool is_positive)
|
||||
: body_(body),
|
||||
is_positive_(is_positive) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpLookahead* AsLookahead();
|
||||
virtual bool IsLookahead();
|
||||
RegExpTree* body() { return body_; }
|
||||
bool is_positive() { return is_positive_; }
|
||||
private:
|
||||
RegExpTree* body_;
|
||||
bool is_positive_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpBackReference: public RegExpTree {
|
||||
public:
|
||||
explicit RegExpBackReference(RegExpCapture* capture)
|
||||
: capture_(capture) { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpBackReference* AsBackReference();
|
||||
virtual bool IsBackReference();
|
||||
int index() { return capture_->index(); }
|
||||
RegExpCapture* capture() { return capture_; }
|
||||
private:
|
||||
RegExpCapture* capture_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpEmpty: public RegExpTree {
|
||||
public:
|
||||
RegExpEmpty() { }
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure);
|
||||
virtual RegExpEmpty* AsEmpty();
|
||||
virtual bool IsEmpty();
|
||||
static RegExpEmpty* GetInstance() { return &kInstance; }
|
||||
private:
|
||||
static RegExpEmpty kInstance;
|
||||
};
|
||||
|
||||
|
||||
class RegExpVisitor BASE_EMBEDDED {
|
||||
public:
|
||||
virtual ~RegExpVisitor() { }
|
||||
#define MAKE_CASE(Name) \
|
||||
virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
|
||||
#undef MAKE_CASE
|
||||
};
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Basic visitor
|
||||
// - leaf node visitors are abstract.
|
||||
|
@ -647,7 +647,7 @@ void Builtins::Setup(bool create_heap_objects) {
|
||||
// During startup it's OK to always allocate and defer GC to later.
|
||||
// This simplifies things because we don't need to retry.
|
||||
AlwaysAllocateScope __scope__;
|
||||
code = Heap::CreateCode(desc, NULL, flags);
|
||||
code = Heap::CreateCode(desc, NULL, flags, NULL);
|
||||
if (code->IsFailure()) {
|
||||
v8::internal::V8::FatalProcessOutOfMemory("CreateCode");
|
||||
}
|
||||
|
78
src/bytecodes-irregexp.h
Normal file
78
src/bytecodes-irregexp.h
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
#ifndef V8_BYTECODES_IRREGEXP_H_
|
||||
#define V8_BYTECODES_IRREGEXP_H_
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
#define BYTECODE_ITERATOR(V) \
|
||||
V(BREAK, 0, 1) /* break */ \
|
||||
V(PUSH_CP, 1, 5) /* push_cp offset32 */ \
|
||||
V(PUSH_BT, 2, 5) /* push_bt addr32 */ \
|
||||
V(PUSH_REGISTER, 3, 2) /* push_register register_index */ \
|
||||
V(SET_REGISTER_TO_CP, 4, 6) /* set_register_to_cp register_index offset32 */ \
|
||||
V(SET_CP_TO_REGISTER, 5, 2) /* set_cp_to_registger register_index */ \
|
||||
V(SET_REGISTER_TO_SP, 6, 2) /* set_register_to_sp register_index */ \
|
||||
V(SET_SP_TO_REGISTER, 7, 2) /* set_sp_to_registger register_index */ \
|
||||
V(SET_REGISTER, 8, 6) /* set_register register_index value32 */ \
|
||||
V(ADVANCE_REGISTER, 9, 6) /* advance_register register_index value32 */ \
|
||||
V(POP_CP, 10, 1) /* pop_cp */ \
|
||||
V(POP_BT, 11, 1) /* pop_bt */ \
|
||||
V(POP_REGISTER, 12, 2) /* pop_register register_index */ \
|
||||
V(FAIL, 13, 1) /* fail */ \
|
||||
V(SUCCEED, 14, 1) /* succeed */ \
|
||||
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
|
||||
V(GOTO, 16, 5) /* goto addr32 */ \
|
||||
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
|
||||
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \
|
||||
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \
|
||||
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \
|
||||
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
|
||||
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
|
||||
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
|
||||
V(LOOKUP_MAP1, 25, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
|
||||
V(LOOKUP_MAP2, 26, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
|
||||
V(LOOKUP_MAP8, 27, 99) /* l_map8 start16 byte_map addr32* */ \
|
||||
V(LOOKUP_HI_MAP8, 28, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
|
||||
V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16 addr32 */ \
|
||||
V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16 addr32 */ \
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) \
|
||||
static const int BC_##name = code;
|
||||
BYTECODE_ITERATOR(DECLARE_BYTECODES)
|
||||
#undef DECLARE_BYTECODES
|
||||
|
||||
#define DECLARE_BYTECODE_LENGTH(name, code, length) \
|
||||
static const int BC_##name##_LENGTH = length;
|
||||
BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
|
||||
#undef DECLARE_BYTECODE_LENGTH
|
||||
} }
|
||||
|
||||
#endif // V8_BYTECODES_IRREGEXP_H_
|
@ -237,12 +237,14 @@ template <int> class StaticAssertionHelper { };
|
||||
// The ASSERT macro is equivalent to CHECK except that it only
|
||||
// generates code in debug builds. Ditto STATIC_ASSERT.
|
||||
#ifdef DEBUG
|
||||
#define ASSERT_RESULT(expr) CHECK(expr)
|
||||
#define ASSERT(condition) CHECK(condition)
|
||||
#define ASSERT_EQ(v1, v2) CHECK_EQ(v1, v2)
|
||||
#define ASSERT_NE(v1, v2) CHECK_NE(v1, v2)
|
||||
#define STATIC_ASSERT(test) STATIC_CHECK(test)
|
||||
#define SLOW_ASSERT(condition) if (FLAG_enable_slow_asserts) CHECK(condition)
|
||||
#else
|
||||
#define ASSERT_RESULT(expr) (expr)
|
||||
#define ASSERT(condition) ((void) 0)
|
||||
#define ASSERT_EQ(v1, v2) ((void) 0)
|
||||
#define ASSERT_NE(v1, v2) ((void) 0)
|
||||
@ -256,4 +258,6 @@ template <int> class StaticAssertionHelper { };
|
||||
|
||||
#define ASSERT_SIZE_TAG_ALIGNED(size) ASSERT((size & kHeapObjectTagMask) == 0)
|
||||
|
||||
#define ASSERT_NOT_NULL(p) ASSERT_NE(NULL, p)
|
||||
|
||||
#endif // V8_CHECKS_H_
|
||||
|
@ -120,7 +120,7 @@ typedef int32_t instr_t;
|
||||
// bits.
|
||||
//
|
||||
// bool InstructionSetsConditionCodes(byte* ptr) {
|
||||
// Instr *instr = Instr::At(ptr);
|
||||
// Instr* instr = Instr::At(ptr);
|
||||
// int type = instr->TypeField();
|
||||
// return ((type == 0) || (type == 1)) && instr->HasS();
|
||||
// }
|
||||
|
@ -170,9 +170,9 @@ Handle<Proxy> Factory::NewProxy(const AccessorDescriptor* desc) {
|
||||
}
|
||||
|
||||
|
||||
Handle<ByteArray> Factory::NewByteArray(int length) {
|
||||
Handle<ByteArray> Factory::NewByteArray(int length, PretenureFlag pretenure) {
|
||||
ASSERT(0 <= length);
|
||||
CALL_HEAP_FUNCTION(Heap::AllocateByteArray(length), ByteArray);
|
||||
CALL_HEAP_FUNCTION(Heap::AllocateByteArray(length, pretenure), ByteArray);
|
||||
}
|
||||
|
||||
|
||||
@ -457,9 +457,15 @@ Handle<JSFunction> Factory::NewFunctionWithPrototype(Handle<String> name,
|
||||
}
|
||||
|
||||
|
||||
Handle<Code> Factory::NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
|
||||
Code::Flags flags, Handle<Object> self_ref) {
|
||||
CALL_HEAP_FUNCTION(Heap::CreateCode(
|
||||
desc, sinfo, flags, reinterpret_cast<Code**>(self_ref.location())), Code);
|
||||
}
|
||||
|
||||
Handle<Code> Factory::NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
|
||||
Code::Flags flags) {
|
||||
CALL_HEAP_FUNCTION(Heap::CreateCode(desc, sinfo, flags), Code);
|
||||
CALL_HEAP_FUNCTION(Heap::CreateCode(desc, sinfo, flags, NULL), Code);
|
||||
}
|
||||
|
||||
|
||||
@ -706,8 +712,11 @@ Handle<JSFunction> Factory::CreateApiFunction(
|
||||
ASSERT(type != INVALID_TYPE);
|
||||
|
||||
Handle<JSFunction> result =
|
||||
Factory::NewFunction(Factory::empty_symbol(), type, instance_size,
|
||||
code, true);
|
||||
Factory::NewFunction(Factory::empty_symbol(),
|
||||
type,
|
||||
instance_size,
|
||||
code,
|
||||
true);
|
||||
// Set class name.
|
||||
Handle<Object> class_name = Handle<Object>(obj->class_name());
|
||||
if (class_name->IsString()) {
|
||||
|
@ -147,7 +147,8 @@ class Factory : public AllStatic {
|
||||
// the old generation).
|
||||
static Handle<Proxy> NewProxy(const AccessorDescriptor* proxy);
|
||||
|
||||
static Handle<ByteArray> NewByteArray(int length);
|
||||
static Handle<ByteArray> NewByteArray(int length,
|
||||
PretenureFlag pretenure = NOT_TENURED);
|
||||
|
||||
static Handle<Map> NewMap(InstanceType type, int instance_size);
|
||||
|
||||
@ -205,6 +206,9 @@ class Factory : public AllStatic {
|
||||
Handle<JSFunction> boilerplate,
|
||||
Handle<Context> context);
|
||||
|
||||
static Handle<Code> NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
|
||||
Code::Flags flags, Handle<Object> self_reference);
|
||||
|
||||
static Handle<Code> NewCode(const CodeDesc& desc, ScopeInfo<>* sinfo,
|
||||
Code::Flags flags);
|
||||
|
||||
|
@ -289,6 +289,12 @@ DEFINE_bool(collect_heap_spill_statistics, false,
|
||||
"report heap spill statistics along with heap_stats "
|
||||
"(requires heap_stats)")
|
||||
|
||||
DEFINE_bool(irregexp, false, "new regular expression code")
|
||||
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
|
||||
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode executon")
|
||||
DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case independent")
|
||||
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
|
||||
|
||||
//
|
||||
// Logging and profiling only flags
|
||||
//
|
||||
|
@ -178,10 +178,16 @@ class Map;
|
||||
class MapSpace;
|
||||
class MarkCompactCollector;
|
||||
class NewSpace;
|
||||
class NodeVisitor;
|
||||
class Object;
|
||||
class OldSpace;
|
||||
class Property;
|
||||
class Proxy;
|
||||
class RegExpNode;
|
||||
struct RegExpParseResult;
|
||||
class RegExpTree;
|
||||
class RegExpCompiler;
|
||||
class RegExpVisitor;
|
||||
class Scope;
|
||||
template<class Allocator = FreeStoreAllocationPolicy> class ScopeInfo;
|
||||
class Script;
|
||||
|
43
src/heap.cc
43
src/heap.cc
@ -392,8 +392,7 @@ void Heap::PerformGarbageCollection(AllocationSpace space,
|
||||
}
|
||||
Counters::objs_since_last_young.Set(0);
|
||||
|
||||
// Process weak handles post gc.
|
||||
GlobalHandles::PostGarbageCollectionProcessing();
|
||||
PostGarbageCollectionProcessing();
|
||||
|
||||
if (collector == MARK_COMPACTOR) {
|
||||
// Register the amount of external allocated memory.
|
||||
@ -408,6 +407,14 @@ void Heap::PerformGarbageCollection(AllocationSpace space,
|
||||
}
|
||||
|
||||
|
||||
void Heap::PostGarbageCollectionProcessing() {
|
||||
// Process weak handles post gc.
|
||||
GlobalHandles::PostGarbageCollectionProcessing();
|
||||
// Update flat string readers.
|
||||
FlatStringReader::PostGarbageCollectionProcessing();
|
||||
}
|
||||
|
||||
|
||||
void Heap::MarkCompact(GCTracer* tracer) {
|
||||
gc_state_ = MARK_COMPACT;
|
||||
mc_count_++;
|
||||
@ -1582,6 +1589,24 @@ Object* Heap::LookupSingleCharacterStringFromCode(uint16_t code) {
|
||||
}
|
||||
|
||||
|
||||
Object* Heap::AllocateByteArray(int length, PretenureFlag pretenure) {
|
||||
if (pretenure == NOT_TENURED) {
|
||||
return AllocateByteArray(length);
|
||||
}
|
||||
int size = ByteArray::SizeFor(length);
|
||||
AllocationSpace space =
|
||||
size > MaxHeapObjectSize() ? LO_SPACE : OLD_DATA_SPACE;
|
||||
|
||||
Object* result = AllocateRaw(size, space, OLD_DATA_SPACE);
|
||||
|
||||
if (result->IsFailure()) return result;
|
||||
|
||||
reinterpret_cast<Array*>(result)->set_map(byte_array_map());
|
||||
reinterpret_cast<Array*>(result)->set_length(length);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Object* Heap::AllocateByteArray(int length) {
|
||||
int size = ByteArray::SizeFor(length);
|
||||
AllocationSpace space =
|
||||
@ -1599,7 +1624,8 @@ Object* Heap::AllocateByteArray(int length) {
|
||||
|
||||
Object* Heap::CreateCode(const CodeDesc& desc,
|
||||
ScopeInfo<>* sinfo,
|
||||
Code::Flags flags) {
|
||||
Code::Flags flags,
|
||||
Code** self_reference) {
|
||||
// Compute size
|
||||
int body_size = RoundUp(desc.instr_size + desc.reloc_size, kObjectAlignment);
|
||||
int sinfo_size = 0;
|
||||
@ -1622,7 +1648,16 @@ Object* Heap::CreateCode(const CodeDesc& desc,
|
||||
code->set_sinfo_size(sinfo_size);
|
||||
code->set_flags(flags);
|
||||
code->set_ic_flag(Code::IC_TARGET_IS_ADDRESS);
|
||||
code->CopyFrom(desc); // migrate generated code
|
||||
// Allow self references to created code object.
|
||||
if (self_reference != NULL) {
|
||||
*self_reference = code;
|
||||
}
|
||||
// Migrate generated code.
|
||||
// The generated code can contain Object** values (typically from handles)
|
||||
// that are dereferenced during the copy to point directly to the actual heap
|
||||
// objects. These pointers can include references to the code object itself,
|
||||
// through the self_reference parameter.
|
||||
code->CopyFrom(desc);
|
||||
if (sinfo != NULL) sinfo->Serialize(code); // write scope info
|
||||
|
||||
#ifdef DEBUG
|
||||
|
18
src/heap.h
18
src/heap.h
@ -391,7 +391,13 @@ class Heap : public AllStatic {
|
||||
// Allocate a byte array of the specified length
|
||||
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
|
||||
// failed.
|
||||
// Please not this does not perform a garbage collection.
|
||||
// Please note this does not perform a garbage collection.
|
||||
static Object* AllocateByteArray(int length, PretenureFlag pretenure);
|
||||
|
||||
// Allocate a non-tenured byte array of the specified length
|
||||
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
|
||||
// failed.
|
||||
// Please note this does not perform a garbage collection.
|
||||
static Object* AllocateByteArray(int length);
|
||||
|
||||
// Allocates a fixed array initialized with undefined values
|
||||
@ -549,11 +555,14 @@ class Heap : public AllStatic {
|
||||
|
||||
// Makes a new native code object
|
||||
// Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
|
||||
// failed.
|
||||
// failed. On success, the pointer to the Code object is stored in the
|
||||
// self_reference. This allows generated code to reference its own Code
|
||||
// object by containing this pointer.
|
||||
// Please note this function does not perform a garbage collection.
|
||||
static Object* CreateCode(const CodeDesc& desc,
|
||||
ScopeInfo<>* sinfo,
|
||||
Code::Flags flags);
|
||||
Code::Flags flags,
|
||||
Code** self_reference = NULL);
|
||||
|
||||
static Object* CopyCode(Code* code);
|
||||
// Finds the symbol for string in the symbol table.
|
||||
@ -582,6 +591,9 @@ class Heap : public AllStatic {
|
||||
static void GarbageCollectionPrologue();
|
||||
static void GarbageCollectionEpilogue();
|
||||
|
||||
// Code that should be executed after the garbage collection proper.
|
||||
static void PostGarbageCollectionProcessing();
|
||||
|
||||
// Performs garbage collection operation.
|
||||
// Returns whether required_space bytes are available after the collection.
|
||||
static bool CollectGarbage(int required_space, AllocationSpace space);
|
||||
|
347
src/interpreter-irregexp.cc
Normal file
347
src/interpreter-irregexp.cc
Normal file
@ -0,0 +1,347 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// A simple interpreter for the Irregexp byte code.
|
||||
|
||||
|
||||
#include "v8.h"
|
||||
#include "utils.h"
|
||||
#include "ast.h"
|
||||
#include "bytecodes-irregexp.h"
|
||||
#include "interpreter-irregexp.h"
|
||||
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
static void TraceInterpreter(const byte* code_base,
|
||||
const byte* pc,
|
||||
int stack_depth,
|
||||
int current_position,
|
||||
int bytecode_length,
|
||||
const char* bytecode_name) {
|
||||
if (FLAG_trace_regexp_bytecodes) {
|
||||
PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
|
||||
pc - code_base,
|
||||
stack_depth,
|
||||
current_position,
|
||||
bytecode_name);
|
||||
for (int i = 1; i < bytecode_length; i++) {
|
||||
printf(", %02x", pc[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# define BYTECODE(name) case BC_##name: \
|
||||
TraceInterpreter(code_base, \
|
||||
pc, \
|
||||
backtrack_sp - backtrack_stack, \
|
||||
current, \
|
||||
BC_##name##_LENGTH, \
|
||||
#name);
|
||||
#else
|
||||
# define BYTECODE(name) case BC_##name: // NOLINT
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static bool RawMatch(const byte* code_base,
|
||||
Vector<const uc16> subject,
|
||||
int* registers,
|
||||
int current) {
|
||||
const byte* pc = code_base;
|
||||
static const int kBacktrackStackSize = 10000;
|
||||
int backtrack_stack[kBacktrackStackSize];
|
||||
int backtrack_stack_space = kBacktrackStackSize;
|
||||
int* backtrack_sp = backtrack_stack;
|
||||
int current_char = -1;
|
||||
#ifdef DEBUG
|
||||
if (FLAG_trace_regexp_bytecodes) {
|
||||
PrintF("\n\nStart bytecode interpreter\n\n");
|
||||
}
|
||||
#endif
|
||||
while (true) {
|
||||
switch (*pc) {
|
||||
BYTECODE(BREAK)
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
BYTECODE(PUSH_CP)
|
||||
if (--backtrack_stack_space < 0) {
|
||||
return false; // No match on backtrack stack overflow.
|
||||
}
|
||||
*backtrack_sp++ = current + Load32(pc + 1);
|
||||
pc += BC_PUSH_CP_LENGTH;
|
||||
break;
|
||||
BYTECODE(PUSH_BT)
|
||||
if (--backtrack_stack_space < 0) {
|
||||
return false; // No match on backtrack stack overflow.
|
||||
}
|
||||
*backtrack_sp++ = Load32(pc + 1);
|
||||
pc += BC_PUSH_BT_LENGTH;
|
||||
break;
|
||||
BYTECODE(PUSH_REGISTER)
|
||||
if (--backtrack_stack_space < 0) {
|
||||
return false; // No match on backtrack stack overflow.
|
||||
}
|
||||
*backtrack_sp++ = registers[pc[1]];
|
||||
pc += BC_PUSH_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(SET_REGISTER)
|
||||
registers[pc[1]] = Load32(pc + 2);
|
||||
pc += BC_SET_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(ADVANCE_REGISTER)
|
||||
registers[pc[1]] += Load32(pc + 2);
|
||||
pc += BC_ADVANCE_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(SET_REGISTER_TO_CP)
|
||||
registers[pc[1]] = current + Load32(pc + 2);
|
||||
pc += BC_SET_REGISTER_TO_CP_LENGTH;
|
||||
break;
|
||||
BYTECODE(SET_CP_TO_REGISTER)
|
||||
current = registers[pc[1]];
|
||||
pc += BC_SET_CP_TO_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(SET_REGISTER_TO_SP)
|
||||
registers[pc[1]] = backtrack_sp - backtrack_stack;
|
||||
pc += BC_SET_REGISTER_TO_SP_LENGTH;
|
||||
break;
|
||||
BYTECODE(SET_SP_TO_REGISTER)
|
||||
backtrack_sp = backtrack_stack + registers[pc[1]];
|
||||
backtrack_stack_space = kBacktrackStackSize -
|
||||
(backtrack_sp - backtrack_stack);
|
||||
pc += BC_SET_SP_TO_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(POP_CP)
|
||||
backtrack_stack_space++;
|
||||
--backtrack_sp;
|
||||
current = *backtrack_sp;
|
||||
pc += BC_POP_CP_LENGTH;
|
||||
break;
|
||||
BYTECODE(POP_BT)
|
||||
backtrack_stack_space++;
|
||||
--backtrack_sp;
|
||||
pc = code_base + *backtrack_sp;
|
||||
break;
|
||||
BYTECODE(POP_REGISTER)
|
||||
backtrack_stack_space++;
|
||||
--backtrack_sp;
|
||||
registers[pc[1]] = *backtrack_sp;
|
||||
pc += BC_POP_REGISTER_LENGTH;
|
||||
break;
|
||||
BYTECODE(FAIL)
|
||||
return false;
|
||||
BYTECODE(SUCCEED)
|
||||
return true;
|
||||
BYTECODE(ADVANCE_CP)
|
||||
current += Load32(pc + 1);
|
||||
pc += BC_ADVANCE_CP_LENGTH;
|
||||
break;
|
||||
BYTECODE(GOTO)
|
||||
pc = code_base + Load32(pc + 1);
|
||||
break;
|
||||
BYTECODE(LOAD_CURRENT_CHAR) {
|
||||
int pos = current + Load32(pc + 1);
|
||||
if (pos >= subject.length()) {
|
||||
pc = code_base + Load32(pc + 5);
|
||||
} else {
|
||||
current_char = subject[pos];
|
||||
pc += BC_LOAD_CURRENT_CHAR_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_CHAR) {
|
||||
int c = Load16(pc + 1);
|
||||
if (c == current_char) {
|
||||
pc = code_base + Load32(pc + 3);
|
||||
} else {
|
||||
pc += BC_CHECK_CHAR_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_CHAR) {
|
||||
int c = Load16(pc + 1);
|
||||
if (c != current_char) {
|
||||
pc = code_base + Load32(pc + 3);
|
||||
} else {
|
||||
pc += BC_CHECK_NOT_CHAR_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(OR_CHECK_NOT_CHAR) {
|
||||
int c = Load16(pc + 1);
|
||||
if (c != (current_char | Load16(pc + 3))) {
|
||||
pc = code_base + Load32(pc + 5);
|
||||
} else {
|
||||
pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
|
||||
int c = Load16(pc + 1);
|
||||
int m = Load16(pc + 3);
|
||||
if (c != ((current_char - m) | m)) {
|
||||
pc = code_base + Load32(pc + 5);
|
||||
} else {
|
||||
pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_LT) {
|
||||
int limit = Load16(pc + 1);
|
||||
if (current_char < limit) {
|
||||
pc = code_base + Load32(pc + 3);
|
||||
} else {
|
||||
pc += BC_CHECK_LT_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_GT) {
|
||||
int limit = Load16(pc + 1);
|
||||
if (current_char > limit) {
|
||||
pc = code_base + Load32(pc + 3);
|
||||
} else {
|
||||
pc += BC_CHECK_GT_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_REGISTER_LT)
|
||||
if (registers[pc[1]] < Load16(pc + 2)) {
|
||||
pc = code_base + Load32(pc + 4);
|
||||
} else {
|
||||
pc += BC_CHECK_REGISTER_LT_LENGTH;
|
||||
}
|
||||
break;
|
||||
BYTECODE(CHECK_REGISTER_GE)
|
||||
if (registers[pc[1]] >= Load16(pc + 2)) {
|
||||
pc = code_base + Load32(pc + 4);
|
||||
} else {
|
||||
pc += BC_CHECK_REGISTER_GE_LENGTH;
|
||||
}
|
||||
break;
|
||||
BYTECODE(LOOKUP_MAP1) {
|
||||
// Look up character in a bitmap. If we find a 0, then jump to the
|
||||
// location at pc + 7. Otherwise fall through!
|
||||
int index = current_char - Load16(pc + 1);
|
||||
byte map = code_base[Load32(pc + 3) + (index >> 3)];
|
||||
map = ((map >> (index & 7)) & 1);
|
||||
if (map == 0) {
|
||||
pc = code_base + Load32(pc + 7);
|
||||
} else {
|
||||
pc += BC_LOOKUP_MAP1_LENGTH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(LOOKUP_MAP2) {
|
||||
// Look up character in a half-nibble map. If we find 00, then jump to
|
||||
// the location at pc + 7. If we find 01 then jump to location at
|
||||
// pc + 11, etc.
|
||||
int index = (current_char - Load16(pc + 1)) << 1;
|
||||
byte map = code_base[Load32(pc + 3) + (index >> 3)];
|
||||
map = ((map >> (index & 7)) & 3);
|
||||
if (map < 2) {
|
||||
if (map == 0) {
|
||||
pc = code_base + Load32(pc + 7);
|
||||
} else {
|
||||
pc = code_base + Load32(pc + 11);
|
||||
}
|
||||
} else {
|
||||
if (map == 2) {
|
||||
pc = code_base + Load32(pc + 15);
|
||||
} else {
|
||||
pc = code_base + Load32(pc + 19);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
BYTECODE(LOOKUP_MAP8) {
|
||||
// Look up character in a byte map. Use the byte as an index into a
|
||||
// table that follows this instruction immediately.
|
||||
int index = current_char - Load16(pc + 1);
|
||||
byte map = code_base[Load32(pc + 3) + index];
|
||||
const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
|
||||
pc = code_base + Load32(new_pc);
|
||||
break;
|
||||
}
|
||||
BYTECODE(LOOKUP_HI_MAP8) {
|
||||
// Look up high byte of this character in a byte map. Use the byte as
|
||||
// an index into a table that follows this instruction immediately.
|
||||
int index = (current_char >> 8) - pc[1];
|
||||
byte map = code_base[Load32(pc + 2) + index];
|
||||
const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
|
||||
pc = code_base + Load32(new_pc);
|
||||
break;
|
||||
}
|
||||
BYTECODE(CHECK_NOT_BACK_REF) {
|
||||
int from = registers[pc[1]];
|
||||
int len = registers[pc[1] + 1] - from;
|
||||
if (current + len > subject.length()) {
|
||||
pc = code_base + Load32(pc + 2);
|
||||
break;
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (subject[from + i] != subject[current + i]) {
|
||||
pc = code_base + Load32(pc + 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < len) break;
|
||||
current += len;
|
||||
}
|
||||
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool IrregexpInterpreter::Match(Handle<ByteArray> code_array,
|
||||
Handle<String> subject16,
|
||||
int* registers,
|
||||
int start_position) {
|
||||
ASSERT(StringShape(*subject16).IsTwoByteRepresentation());
|
||||
ASSERT(subject16->IsFlat(StringShape(*subject16)));
|
||||
|
||||
|
||||
AssertNoAllocation a;
|
||||
const byte* code_base = code_array->GetDataStartAddress();
|
||||
return RawMatch(code_base,
|
||||
Vector<const uc16>(subject16->GetTwoByteData(),
|
||||
subject16->length()),
|
||||
registers,
|
||||
start_position);
|
||||
}
|
||||
|
||||
} } // namespace v8::internal
|
47
src/interpreter-irregexp.h
Normal file
47
src/interpreter-irregexp.h
Normal file
@ -0,0 +1,47 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// A simple interpreter for the Regexp2000 byte code.
|
||||
|
||||
#ifndef V8_INTERPRETER_IRREGEXP_H_
|
||||
#define V8_INTERPRETER_IRREGEXP_H_
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
class IrregexpInterpreter {
|
||||
public:
|
||||
static bool Match(Handle<ByteArray> code,
|
||||
Handle<String> subject16,
|
||||
int* captures,
|
||||
int start_position);
|
||||
};
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_INTERPRETER_IRREGEXP_H_
|
266
src/jsregexp-inl.h
Normal file
266
src/jsregexp-inl.h
Normal file
@ -0,0 +1,266 @@
|
||||
// Copyright 2006-2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef V8_JSREGEXP_INL_H_
|
||||
#define V8_JSREGEXP_INL_H_
|
||||
|
||||
|
||||
#include "jsregexp.h"
|
||||
#include "regexp-macro-assembler.h"
|
||||
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::Insert(const Key& key, Locator* locator) {
|
||||
if (is_empty()) {
|
||||
// If the tree is empty, insert the new node.
|
||||
root_ = new Node(key, C::kNoValue);
|
||||
} else {
|
||||
// Splay on the key to move the last node on the search path
|
||||
// for the key to the root of the tree.
|
||||
Splay(key);
|
||||
// Ignore repeated insertions with the same key.
|
||||
int cmp = C::Compare(key, root_->key_);
|
||||
if (cmp == 0) {
|
||||
locator->bind(root_);
|
||||
return false;
|
||||
}
|
||||
// Insert the new node.
|
||||
Node* node = new Node(key, C::kNoValue);
|
||||
if (cmp > 0) {
|
||||
node->left_ = root_;
|
||||
node->right_ = root_->right_;
|
||||
root_->right_ = NULL;
|
||||
} else {
|
||||
node->right_ = root_;
|
||||
node->left_ = root_->left_;
|
||||
root_->left_ = NULL;
|
||||
}
|
||||
root_ = node;
|
||||
}
|
||||
locator->bind(root_);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::Find(const Key& key, Locator* locator) {
|
||||
if (is_empty())
|
||||
return false;
|
||||
Splay(key);
|
||||
if (C::Compare(key, root_->key_) == 0) {
|
||||
locator->bind(root_);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::FindGreatestLessThan(const Key& key,
|
||||
Locator* locator) {
|
||||
if (is_empty())
|
||||
return false;
|
||||
// Splay on the key to move the node with the given key or the last
|
||||
// node on the search path to the top of the tree.
|
||||
Splay(key);
|
||||
// Now the result is either the root node or the greatest node in
|
||||
// the left subtree.
|
||||
int cmp = C::Compare(root_->key_, key);
|
||||
if (cmp <= 0) {
|
||||
locator->bind(root_);
|
||||
return true;
|
||||
} else {
|
||||
Node* temp = root_;
|
||||
root_ = root_->left_;
|
||||
bool result = FindGreatest(locator);
|
||||
root_ = temp;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::FindLeastGreaterThan(const Key& key,
|
||||
Locator* locator) {
|
||||
if (is_empty())
|
||||
return false;
|
||||
// Splay on the key to move the node with the given key or the last
|
||||
// node on the search path to the top of the tree.
|
||||
Splay(key);
|
||||
// Now the result is either the root node or the least node in
|
||||
// the right subtree.
|
||||
int cmp = C::Compare(root_->key_, key);
|
||||
if (cmp >= 0) {
|
||||
locator->bind(root_);
|
||||
return true;
|
||||
} else {
|
||||
Node* temp = root_;
|
||||
root_ = root_->right_;
|
||||
bool result = FindLeast(locator);
|
||||
root_ = temp;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::FindGreatest(Locator* locator) {
|
||||
if (is_empty())
|
||||
return false;
|
||||
Node* current = root_;
|
||||
while (current->right_ != NULL)
|
||||
current = current->right_;
|
||||
locator->bind(current);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::FindLeast(Locator* locator) {
|
||||
if (is_empty())
|
||||
return false;
|
||||
Node* current = root_;
|
||||
while (current->left_ != NULL)
|
||||
current = current->left_;
|
||||
locator->bind(current);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
bool ZoneSplayTree<C>::Remove(const Key& key) {
|
||||
// Bail if the tree is empty
|
||||
if (is_empty())
|
||||
return false;
|
||||
// Splay on the key to move the node with the given key to the top.
|
||||
Splay(key);
|
||||
// Bail if the key is not in the tree
|
||||
if (C::Compare(key, root_->key_) != 0)
|
||||
return false;
|
||||
if (root_->left_ == NULL) {
|
||||
// No left child, so the new tree is just the right child.
|
||||
root_ = root_->right_;
|
||||
} else {
|
||||
// Left child exists.
|
||||
Node* right = root_->right_;
|
||||
// Make the original left child the new root.
|
||||
root_ = root_->left_;
|
||||
// Splay to make sure that the new root has an empty right child.
|
||||
Splay(key);
|
||||
// Insert the original right child as the right child of the new
|
||||
// root.
|
||||
root_->right_ = right;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <typename C>
|
||||
void ZoneSplayTree<C>::Splay(const Key& key) {
|
||||
if (is_empty())
|
||||
return;
|
||||
Node dummy_node(C::kNoKey, C::kNoValue);
|
||||
// Create a dummy node. The use of the dummy node is a bit
|
||||
// counter-intuitive: The right child of the dummy node will hold
|
||||
// the L tree of the algorithm. The left child of the dummy node
|
||||
// will hold the R tree of the algorithm. Using a dummy node, left
|
||||
// and right will always be nodes and we avoid special cases.
|
||||
Node* dummy = &dummy_node;
|
||||
Node* left = dummy;
|
||||
Node* right = dummy;
|
||||
Node* current = root_;
|
||||
while (true) {
|
||||
int cmp = C::Compare(key, current->key_);
|
||||
if (cmp < 0) {
|
||||
if (current->left_ == NULL)
|
||||
break;
|
||||
if (C::Compare(key, current->left_->key_) < 0) {
|
||||
// Rotate right.
|
||||
Node* temp = current->left_;
|
||||
current->left_ = temp->right_;
|
||||
temp->right_ = current;
|
||||
current = temp;
|
||||
if (current->left_ == NULL)
|
||||
break;
|
||||
}
|
||||
// Link right.
|
||||
right->left_ = current;
|
||||
right = current;
|
||||
current = current->left_;
|
||||
} else if (cmp > 0) {
|
||||
if (current->right_ == NULL)
|
||||
break;
|
||||
if (C::Compare(key, current->right_->key_) > 0) {
|
||||
// Rotate left.
|
||||
Node* temp = current->right_;
|
||||
current->right_ = temp->left_;
|
||||
temp->left_ = current;
|
||||
current = temp;
|
||||
if (current->right_ == NULL)
|
||||
break;
|
||||
}
|
||||
// Link left.
|
||||
left->right_ = current;
|
||||
left = current;
|
||||
current = current->right_;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Assemble.
|
||||
left->right_ = current->left_;
|
||||
right->left_ = current->right_;
|
||||
current->left_ = dummy->right_;
|
||||
current->right_ = dummy->left_;
|
||||
root_ = current;
|
||||
}
|
||||
|
||||
|
||||
template <typename Node, class Callback>
|
||||
static void DoForEach(Node* node, Callback* callback) {
|
||||
if (node == NULL) return;
|
||||
DoForEach<Node, Callback>(node->left(), callback);
|
||||
callback->Call(node->key(), node->value());
|
||||
DoForEach<Node, Callback>(node->right(), callback);
|
||||
}
|
||||
|
||||
|
||||
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
|
||||
macro->Bind(&label_);
|
||||
}
|
||||
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
||||
#endif // V8_JSREGEXP_INL_H_
|
2169
src/jsregexp.cc
2169
src/jsregexp.cc
File diff suppressed because it is too large
Load Diff
692
src/jsregexp.h
692
src/jsregexp.h
@ -30,6 +30,10 @@
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
class RegExpMacroAssembler;
|
||||
|
||||
|
||||
class RegExpImpl {
|
||||
public:
|
||||
// Creates a regular expression literal in the old space.
|
||||
@ -61,10 +65,28 @@ class RegExpImpl {
|
||||
static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject);
|
||||
|
||||
// Stores an uncompiled RegExp pattern in the JSRegExp object.
|
||||
// It will be compiled by JSCRE when first executed.
|
||||
static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
|
||||
Handle<String> pattern,
|
||||
JSRegExp::Flags flags);
|
||||
|
||||
// Stores a compiled RegExp pattern in the JSRegExp object.
|
||||
// The pattern is compiled by Irregexp.
|
||||
static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
|
||||
Handle<String> pattern,
|
||||
JSRegExp::Flags flags,
|
||||
Handle<FixedArray> irregexp_data);
|
||||
|
||||
|
||||
// Compile the pattern using JSCRE and store the result in the
|
||||
// JSRegExp object.
|
||||
static Handle<Object> JscreCompile(Handle<JSRegExp> re);
|
||||
|
||||
static Handle<Object> AtomCompile(Handle<JSRegExp> re,
|
||||
Handle<String> pattern,
|
||||
JSRegExp::Flags flags);
|
||||
|
||||
JSRegExp::Flags flags,
|
||||
Handle<String> match_pattern);
|
||||
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<Object> index);
|
||||
@ -72,47 +94,78 @@ class RegExpImpl {
|
||||
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject);
|
||||
|
||||
static Handle<Object> JsreCompile(Handle<JSRegExp> re,
|
||||
Handle<String> pattern,
|
||||
JSRegExp::Flags flags);
|
||||
static Handle<Object> JscreCompile(Handle<JSRegExp> re,
|
||||
Handle<String> pattern,
|
||||
JSRegExp::Flags flags);
|
||||
|
||||
static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<Object> index);
|
||||
// Execute a compiled JSCRE pattern.
|
||||
static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<Object> index);
|
||||
|
||||
static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject);
|
||||
// Execute an Irregexp bytecode pattern.
|
||||
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject,
|
||||
Handle<Object> index);
|
||||
|
||||
static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject);
|
||||
|
||||
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject);
|
||||
|
||||
static void NewSpaceCollectionPrologue();
|
||||
static void OldSpaceCollectionPrologue();
|
||||
|
||||
private:
|
||||
// Converts a source string to a 16 bit flat string. The string
|
||||
// will be either sequential or it will be a SlicedString backed
|
||||
// by a flat string.
|
||||
static Handle<String> StringToTwoByte(Handle<String> pattern);
|
||||
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
|
||||
|
||||
static const int kIrregexpImplementationIndex = 0;
|
||||
static const int kIrregexpNumberOfCapturesIndex = 1;
|
||||
static const int kIrregexpNumberOfRegistersIndex = 2;
|
||||
static const int kIrregexpCodeIndex = 3;
|
||||
static const int kIrregexpDataLength = 4;
|
||||
|
||||
static const int kJscreNumberOfCapturesIndex = 0;
|
||||
static const int kJscreInternalIndex = 1;
|
||||
static const int kJscreDataLength = 2;
|
||||
|
||||
private:
|
||||
static String* last_ascii_string_;
|
||||
static String* two_byte_cached_string_;
|
||||
|
||||
// Returns the caputure from the re.
|
||||
static int JsreCapture(Handle<JSRegExp> re);
|
||||
static ByteArray* JsreInternal(Handle<JSRegExp> re);
|
||||
static int JscreNumberOfCaptures(Handle<JSRegExp> re);
|
||||
static ByteArray* JscreInternal(Handle<JSRegExp> re);
|
||||
|
||||
static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
|
||||
static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
|
||||
static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
|
||||
|
||||
// Call jsRegExpExecute once
|
||||
static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp,
|
||||
int num_captures,
|
||||
Handle<String> subject,
|
||||
int previous_index,
|
||||
const uc16* utf8_subject,
|
||||
int* ovector,
|
||||
int ovector_length);
|
||||
static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
|
||||
int num_captures,
|
||||
Handle<String> subject,
|
||||
int previous_index,
|
||||
const uc16* utf8_subject,
|
||||
int* ovector,
|
||||
int ovector_length);
|
||||
|
||||
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
|
||||
int num_captures,
|
||||
Handle<String> subject16,
|
||||
int previous_index,
|
||||
int* ovector,
|
||||
int ovector_length);
|
||||
|
||||
// Set the subject cache. The previous string buffer is not deleted, so the
|
||||
// caller should ensure that it doesn't leak.
|
||||
static void SetSubjectCache(String* subject, char* utf8_subject,
|
||||
int uft8_length, int character_position,
|
||||
static void SetSubjectCache(String* subject,
|
||||
char* utf8_subject,
|
||||
int uft8_length,
|
||||
int character_position,
|
||||
int utf8_position);
|
||||
|
||||
// A one element cache of the last utf8_subject string and its length. The
|
||||
@ -125,6 +178,599 @@ class RegExpImpl {
|
||||
};
|
||||
|
||||
|
||||
class CharacterRange {
|
||||
public:
|
||||
CharacterRange() : from_(0), to_(0) { }
|
||||
// For compatibility with the CHECK_OK macro
|
||||
CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
|
||||
CharacterRange(uc16 from, uc16 to)
|
||||
: from_(from),
|
||||
to_(to) {
|
||||
}
|
||||
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
|
||||
static inline CharacterRange Singleton(uc16 value) {
|
||||
return CharacterRange(value, value);
|
||||
}
|
||||
static inline CharacterRange Range(uc16 from, uc16 to) {
|
||||
ASSERT(from <= to);
|
||||
return CharacterRange(from, to);
|
||||
}
|
||||
static inline CharacterRange Everything() {
|
||||
return CharacterRange(0, 0xFFFF);
|
||||
}
|
||||
bool Contains(uc16 i) { return from_ <= i && i <= to_; }
|
||||
uc16 from() const { return from_; }
|
||||
void set_from(uc16 value) { from_ = value; }
|
||||
uc16 to() const { return to_; }
|
||||
void set_to(uc16 value) { to_ = value; }
|
||||
bool is_valid() { return from_ <= to_; }
|
||||
bool IsSingleton() { return (from_ == to_); }
|
||||
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
|
||||
static const int kRangeCanonicalizeMax = 0x200;
|
||||
static const int kStartMarker = (1 << 24);
|
||||
static const int kPayloadMask = (1 << 24) - 1;
|
||||
private:
|
||||
uc16 from_;
|
||||
uc16 to_;
|
||||
};
|
||||
|
||||
|
||||
template <typename Node, class Callback>
|
||||
static void DoForEach(Node* node, Callback* callback);
|
||||
|
||||
|
||||
// A zone splay tree. The config type parameter encapsulates the
|
||||
// different configurations of a concrete splay tree:
|
||||
//
|
||||
// typedef Key: the key type
|
||||
// typedef Value: the value type
|
||||
// static const kNoKey: the dummy key used when no key is set
|
||||
// static const kNoValue: the dummy value used to initialize nodes
|
||||
// int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
|
||||
//
|
||||
template <typename Config>
|
||||
class ZoneSplayTree : public ZoneObject {
|
||||
public:
|
||||
typedef typename Config::Key Key;
|
||||
typedef typename Config::Value Value;
|
||||
|
||||
class Locator;
|
||||
|
||||
ZoneSplayTree() : root_(NULL) { }
|
||||
|
||||
// Inserts the given key in this tree with the given value. Returns
|
||||
// true if a node was inserted, otherwise false. If found the locator
|
||||
// is enabled and provides access to the mapping for the key.
|
||||
bool Insert(const Key& key, Locator* locator);
|
||||
|
||||
// Looks up the key in this tree and returns true if it was found,
|
||||
// otherwise false. If the node is found the locator is enabled and
|
||||
// provides access to the mapping for the key.
|
||||
bool Find(const Key& key, Locator* locator);
|
||||
|
||||
// Finds the mapping with the greatest key less than or equal to the
|
||||
// given key.
|
||||
bool FindGreatestLessThan(const Key& key, Locator* locator);
|
||||
|
||||
// Find the mapping with the greatest key in this tree.
|
||||
bool FindGreatest(Locator* locator);
|
||||
|
||||
// Finds the mapping with the least key greater than or equal to the
|
||||
// given key.
|
||||
bool FindLeastGreaterThan(const Key& key, Locator* locator);
|
||||
|
||||
// Find the mapping with the least key in this tree.
|
||||
bool FindLeast(Locator* locator);
|
||||
|
||||
// Remove the node with the given key from the tree.
|
||||
bool Remove(const Key& key);
|
||||
|
||||
bool is_empty() { return root_ == NULL; }
|
||||
|
||||
// Perform the splay operation for the given key. Moves the node with
|
||||
// the given key to the top of the tree. If no node has the given
|
||||
// key, the last node on the search path is moved to the top of the
|
||||
// tree.
|
||||
void Splay(const Key& key);
|
||||
|
||||
class Node : public ZoneObject {
|
||||
public:
|
||||
Node(const Key& key, const Value& value)
|
||||
: key_(key),
|
||||
value_(value),
|
||||
left_(NULL),
|
||||
right_(NULL) { }
|
||||
Key key() { return key_; }
|
||||
Value value() { return value_; }
|
||||
Node* left() { return left_; }
|
||||
Node* right() { return right_; }
|
||||
private:
|
||||
friend class ZoneSplayTree;
|
||||
friend class Locator;
|
||||
Key key_;
|
||||
Value value_;
|
||||
Node* left_;
|
||||
Node* right_;
|
||||
};
|
||||
|
||||
// A locator provides access to a node in the tree without actually
|
||||
// exposing the node.
|
||||
class Locator {
|
||||
public:
|
||||
explicit Locator(Node* node) : node_(node) { }
|
||||
Locator() : node_(NULL) { }
|
||||
const Key& key() { return node_->key_; }
|
||||
Value& value() { return node_->value_; }
|
||||
void set_value(const Value& value) { node_->value_ = value; }
|
||||
inline void bind(Node* node) { node_ = node; }
|
||||
private:
|
||||
Node* node_;
|
||||
};
|
||||
|
||||
template <class Callback>
|
||||
void ForEach(Callback* c) {
|
||||
DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
|
||||
}
|
||||
|
||||
private:
|
||||
Node* root_;
|
||||
};
|
||||
|
||||
|
||||
// A set of unsigned integers that behaves especially well on small
|
||||
// integers (< 32). May do zone-allocation.
|
||||
class OutSet: public ZoneObject {
|
||||
public:
|
||||
OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
|
||||
OutSet* Extend(unsigned value);
|
||||
bool Get(unsigned value);
|
||||
static const unsigned kFirstLimit = 32;
|
||||
private:
|
||||
|
||||
// Destructively set a value in this set. In most cases you want
|
||||
// to use Extend instead to ensure that only one instance exists
|
||||
// that contains the same values.
|
||||
void Set(unsigned value);
|
||||
|
||||
// The successors are a list of sets that contain the same values
|
||||
// as this set and the one more value that is not present in this
|
||||
// set.
|
||||
ZoneList<OutSet*>* successors() { return successors_; }
|
||||
|
||||
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
|
||||
: first_(first), remaining_(remaining), successors_(NULL) { }
|
||||
uint32_t first_;
|
||||
ZoneList<unsigned>* remaining_;
|
||||
ZoneList<OutSet*>* successors_;
|
||||
};
|
||||
|
||||
|
||||
// A mapping from integers, specified as ranges, to a set of integers.
|
||||
// Used for mapping character ranges to choices.
|
||||
class DispatchTable {
|
||||
public:
|
||||
class Entry {
|
||||
public:
|
||||
Entry()
|
||||
: from_(0), to_(0), out_set_(NULL) { }
|
||||
Entry(uc16 from, uc16 to, OutSet* out_set)
|
||||
: from_(from), to_(to), out_set_(out_set) { }
|
||||
uc16 from() { return from_; }
|
||||
uc16 to() { return to_; }
|
||||
void set_to(uc16 value) { to_ = value; }
|
||||
void AddValue(int value) { out_set_ = out_set_->Extend(value); }
|
||||
OutSet* out_set() { return out_set_; }
|
||||
private:
|
||||
uc16 from_;
|
||||
uc16 to_;
|
||||
OutSet* out_set_;
|
||||
};
|
||||
|
||||
class Config {
|
||||
public:
|
||||
typedef uc16 Key;
|
||||
typedef Entry Value;
|
||||
static const uc16 kNoKey;
|
||||
static const Entry kNoValue;
|
||||
static inline int Compare(uc16 a, uc16 b) {
|
||||
if (a == b)
|
||||
return 0;
|
||||
else if (a < b)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
void AddRange(CharacterRange range, int value);
|
||||
OutSet* Get(uc16 value);
|
||||
void Dump();
|
||||
|
||||
template <typename Callback>
|
||||
void ForEach(Callback* callback) { return tree()->ForEach(callback); }
|
||||
private:
|
||||
// There can't be a static empty set since it allocates its
|
||||
// successors in a zone and caches them.
|
||||
OutSet* empty() { return &empty_; }
|
||||
OutSet empty_;
|
||||
ZoneSplayTree<Config>* tree() { return &tree_; }
|
||||
ZoneSplayTree<Config> tree_;
|
||||
};
|
||||
|
||||
|
||||
#define FOR_EACH_NODE_TYPE(VISIT) \
|
||||
VISIT(End) \
|
||||
VISIT(Action) \
|
||||
VISIT(Choice) \
|
||||
VISIT(BackReference) \
|
||||
VISIT(Text)
|
||||
|
||||
|
||||
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
|
||||
VISIT(Disjunction) \
|
||||
VISIT(Alternative) \
|
||||
VISIT(Assertion) \
|
||||
VISIT(CharacterClass) \
|
||||
VISIT(Atom) \
|
||||
VISIT(Quantifier) \
|
||||
VISIT(Capture) \
|
||||
VISIT(Lookahead) \
|
||||
VISIT(BackReference) \
|
||||
VISIT(Empty) \
|
||||
VISIT(Text)
|
||||
|
||||
|
||||
#define FORWARD_DECLARE(Name) class RegExp##Name;
|
||||
FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
|
||||
#undef FORWARD_DECLARE
|
||||
|
||||
|
||||
class TextElement {
|
||||
public:
|
||||
enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
|
||||
TextElement() : type(UNINITIALIZED) { }
|
||||
explicit TextElement(Type t) : type(t) { }
|
||||
static TextElement Atom(RegExpAtom* atom);
|
||||
static TextElement CharClass(RegExpCharacterClass* char_class);
|
||||
Type type;
|
||||
union {
|
||||
RegExpAtom* u_atom;
|
||||
RegExpCharacterClass* u_char_class;
|
||||
} data;
|
||||
};
|
||||
|
||||
|
||||
struct NodeInfo {
|
||||
NodeInfo()
|
||||
: being_analyzed(false),
|
||||
been_analyzed(false),
|
||||
determine_word(false),
|
||||
determine_newline(false),
|
||||
determine_start(false),
|
||||
follows_word_interest(false),
|
||||
follows_newline_interest(false),
|
||||
follows_start_interest(false) { }
|
||||
bool SameInterests(NodeInfo* that) {
|
||||
return (follows_word_interest == that->follows_word_interest)
|
||||
&& (follows_newline_interest == that->follows_newline_interest)
|
||||
&& (follows_start_interest == that->follows_start_interest);
|
||||
}
|
||||
void AdoptInterests(NodeInfo* that) {
|
||||
follows_word_interest = that->follows_word_interest;
|
||||
follows_newline_interest = that->follows_newline_interest;
|
||||
follows_start_interest = that->follows_start_interest;
|
||||
}
|
||||
bool prev_determine_word() {
|
||||
return determine_word || follows_word_interest;
|
||||
}
|
||||
bool prev_determine_newline() {
|
||||
return determine_newline || follows_newline_interest;
|
||||
}
|
||||
bool prev_determine_start() {
|
||||
return determine_start || follows_start_interest;
|
||||
}
|
||||
bool being_analyzed: 1;
|
||||
bool been_analyzed: 1;
|
||||
bool determine_word: 1;
|
||||
bool determine_newline: 1;
|
||||
bool determine_start: 1;
|
||||
bool follows_word_interest: 1;
|
||||
bool follows_newline_interest: 1;
|
||||
bool follows_start_interest: 1;
|
||||
};
|
||||
|
||||
|
||||
STATIC_CHECK(sizeof(NodeInfo) <= sizeof(int)); // NOLINT
|
||||
|
||||
|
||||
class SiblingList {
|
||||
public:
|
||||
SiblingList() : list_(NULL) { }
|
||||
int length() {
|
||||
return list_ == NULL ? 0 : list_->length();
|
||||
}
|
||||
void Ensure(RegExpNode* parent) {
|
||||
if (list_ == NULL) {
|
||||
list_ = new ZoneList<RegExpNode*>(2);
|
||||
list_->Add(parent);
|
||||
}
|
||||
}
|
||||
void Add(RegExpNode* node) { list_->Add(node); }
|
||||
RegExpNode* Get(int index) { return list_->at(index); }
|
||||
private:
|
||||
ZoneList<RegExpNode*>* list_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpNode: public ZoneObject {
|
||||
public:
|
||||
virtual ~RegExpNode() { }
|
||||
virtual void Accept(NodeVisitor* visitor) = 0;
|
||||
// Generates a goto to this node or actually generates the code at this point.
|
||||
// Until the implementation is complete we will return true for success and
|
||||
// false for failure.
|
||||
virtual bool GoTo(RegExpCompiler* compiler);
|
||||
Label* label();
|
||||
|
||||
// Until the implementation is complete we will return true for success and
|
||||
// false for failure.
|
||||
virtual bool Emit(RegExpCompiler* compiler) = 0;
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
|
||||
NodeInfo* info() { return &info_; }
|
||||
virtual bool IsBacktrack() { return false; }
|
||||
RegExpNode* GetSibling(NodeInfo* info);
|
||||
void EnsureSiblings() { siblings_.Ensure(this); }
|
||||
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
|
||||
protected:
|
||||
inline void Bind(RegExpMacroAssembler* macro);
|
||||
private:
|
||||
Label label_;
|
||||
NodeInfo info_;
|
||||
SiblingList siblings_;
|
||||
};
|
||||
|
||||
|
||||
class SeqRegExpNode: public RegExpNode {
|
||||
public:
|
||||
explicit SeqRegExpNode(RegExpNode* on_success)
|
||||
: on_success_(on_success) { }
|
||||
RegExpNode* on_success() { return on_success_; }
|
||||
void set_on_success(RegExpNode* node) { on_success_ = node; }
|
||||
virtual bool Emit(RegExpCompiler* compiler) { return false; }
|
||||
private:
|
||||
RegExpNode* on_success_;
|
||||
};
|
||||
|
||||
|
||||
class ActionNode: public SeqRegExpNode {
|
||||
public:
|
||||
enum Type {
|
||||
STORE_REGISTER,
|
||||
INCREMENT_REGISTER,
|
||||
STORE_POSITION,
|
||||
SAVE_POSITION,
|
||||
RESTORE_POSITION,
|
||||
BEGIN_SUBMATCH,
|
||||
ESCAPE_SUBMATCH
|
||||
};
|
||||
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
|
||||
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
|
||||
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
|
||||
static ActionNode* SavePosition(int reg, RegExpNode* on_success);
|
||||
static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
|
||||
static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success);
|
||||
static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual bool Emit(RegExpCompiler* compiler);
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info);
|
||||
private:
|
||||
union {
|
||||
struct {
|
||||
int reg;
|
||||
int value;
|
||||
} u_store_register;
|
||||
struct {
|
||||
int reg;
|
||||
} u_increment_register;
|
||||
struct {
|
||||
int reg;
|
||||
} u_position_register;
|
||||
struct {
|
||||
int reg;
|
||||
} u_submatch_stack_pointer_register;
|
||||
} data_;
|
||||
ActionNode(Type type, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
type_(type) { }
|
||||
Type type_;
|
||||
friend class DotPrinter;
|
||||
};
|
||||
|
||||
|
||||
class TextNode: public SeqRegExpNode {
|
||||
public:
|
||||
TextNode(ZoneList<TextElement>* elms,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure)
|
||||
: SeqRegExpNode(on_success),
|
||||
on_failure_(on_failure),
|
||||
elms_(elms) { }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info);
|
||||
RegExpNode* on_failure() { return on_failure_; }
|
||||
virtual bool Emit(RegExpCompiler* compiler);
|
||||
ZoneList<TextElement>* elements() { return elms_; }
|
||||
private:
|
||||
RegExpNode* on_failure_;
|
||||
ZoneList<TextElement>* elms_;
|
||||
};
|
||||
|
||||
|
||||
class BackReferenceNode: public SeqRegExpNode {
|
||||
public:
|
||||
BackReferenceNode(int start_reg,
|
||||
int end_reg,
|
||||
RegExpNode* on_success,
|
||||
RegExpNode* on_failure)
|
||||
: SeqRegExpNode(on_success),
|
||||
on_failure_(on_failure),
|
||||
start_reg_(start_reg),
|
||||
end_reg_(end_reg) { }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
RegExpNode* on_failure() { return on_failure_; }
|
||||
int start_register() { return start_reg_; }
|
||||
int end_register() { return end_reg_; }
|
||||
virtual bool Emit(RegExpCompiler* compiler);
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info);
|
||||
private:
|
||||
RegExpNode* on_failure_;
|
||||
int start_reg_;
|
||||
int end_reg_;
|
||||
};
|
||||
|
||||
|
||||
class EndNode: public RegExpNode {
|
||||
public:
|
||||
enum Action { ACCEPT, BACKTRACK };
|
||||
explicit EndNode(Action action) : action_(action) { }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual bool Emit(RegExpCompiler* compiler);
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info);
|
||||
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
|
||||
virtual bool GoTo(RegExpCompiler* compiler);
|
||||
private:
|
||||
Action action_;
|
||||
};
|
||||
|
||||
|
||||
class Guard: public ZoneObject {
|
||||
public:
|
||||
enum Relation { LT, GEQ };
|
||||
Guard(int reg, Relation op, int value)
|
||||
: reg_(reg),
|
||||
op_(op),
|
||||
value_(value) { }
|
||||
int reg() { return reg_; }
|
||||
Relation op() { return op_; }
|
||||
int value() { return value_; }
|
||||
private:
|
||||
int reg_;
|
||||
Relation op_;
|
||||
int value_;
|
||||
};
|
||||
|
||||
|
||||
class GuardedAlternative {
|
||||
public:
|
||||
explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
|
||||
void AddGuard(Guard* guard);
|
||||
RegExpNode* node() { return node_; }
|
||||
void set_node(RegExpNode* node) { node_ = node; }
|
||||
ZoneList<Guard*>* guards() { return guards_; }
|
||||
private:
|
||||
RegExpNode* node_;
|
||||
ZoneList<Guard*>* guards_;
|
||||
};
|
||||
|
||||
|
||||
class ChoiceNode: public RegExpNode {
|
||||
public:
|
||||
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
|
||||
: on_failure_(on_failure),
|
||||
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
|
||||
table_calculated_(false),
|
||||
being_calculated_(false) { }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
|
||||
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
|
||||
DispatchTable* table() { return &table_; }
|
||||
RegExpNode* on_failure() { return on_failure_; }
|
||||
virtual bool Emit(RegExpCompiler* compiler);
|
||||
virtual RegExpNode* PropagateInterest(NodeInfo* info);
|
||||
bool table_calculated() { return table_calculated_; }
|
||||
void set_table_calculated(bool b) { table_calculated_ = b; }
|
||||
bool being_calculated() { return being_calculated_; }
|
||||
void set_being_calculated(bool b) { being_calculated_ = b; }
|
||||
private:
|
||||
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
|
||||
Guard *guard,
|
||||
Label* on_failure);
|
||||
RegExpNode* on_failure_;
|
||||
ZoneList<GuardedAlternative>* alternatives_;
|
||||
DispatchTable table_;
|
||||
bool table_calculated_;
|
||||
bool being_calculated_;
|
||||
};
|
||||
|
||||
|
||||
class NodeVisitor {
|
||||
public:
|
||||
virtual ~NodeVisitor() { }
|
||||
#define DECLARE_VISIT(Type) \
|
||||
virtual void Visit##Type(Type##Node* that) = 0;
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
};
|
||||
|
||||
|
||||
// Node visitor used to add the start set of the alternatives to the
|
||||
// dispatch table of a choice node.
|
||||
class DispatchTableConstructor: public NodeVisitor {
|
||||
public:
|
||||
explicit DispatchTableConstructor(DispatchTable* table)
|
||||
: table_(table),
|
||||
choice_index_(-1) { }
|
||||
|
||||
void BuildTable(ChoiceNode* node);
|
||||
|
||||
void AddRange(CharacterRange range) {
|
||||
table()->AddRange(range, choice_index_);
|
||||
}
|
||||
|
||||
void AddInverse(ZoneList<CharacterRange>* ranges);
|
||||
|
||||
#define DECLARE_VISIT(Type) \
|
||||
virtual void Visit##Type(Type##Node* that);
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
|
||||
DispatchTable* table() { return table_; }
|
||||
void set_choice_index(int value) { choice_index_ = value; }
|
||||
|
||||
protected:
|
||||
DispatchTable *table_;
|
||||
int choice_index_;
|
||||
};
|
||||
|
||||
|
||||
class Analysis: public NodeVisitor {
|
||||
public:
|
||||
void EnsureAnalyzed(RegExpNode* node);
|
||||
|
||||
#define DECLARE_VISIT(Type) \
|
||||
virtual void Visit##Type(Type##Node* that);
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
};
|
||||
|
||||
|
||||
struct RegExpParseResult {
|
||||
RegExpTree* tree;
|
||||
bool has_character_escapes;
|
||||
Handle<String> error;
|
||||
int capture_count;
|
||||
};
|
||||
|
||||
|
||||
class RegExpEngine: public AllStatic {
|
||||
public:
|
||||
static Handle<FixedArray> Compile(RegExpParseResult* input,
|
||||
RegExpNode** node_return,
|
||||
bool ignore_case);
|
||||
static void DotPrint(const char* label, RegExpNode* node);
|
||||
};
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_JSREGEXP_H_
|
||||
|
@ -89,12 +89,19 @@ void List<T, P>::Iterate(void (*callback)(T* x)) {
|
||||
}
|
||||
|
||||
|
||||
template<typename T, class P>
|
||||
bool List<T, P>::Contains(const T& elm) {
|
||||
for (int i = 0; i < length_; i++) {
|
||||
if (data_[i] == elm)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template<typename T, class P>
|
||||
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
|
||||
qsort(data_,
|
||||
length_,
|
||||
sizeof(T),
|
||||
reinterpret_cast<int (*)(const void*, const void*)>(cmp));
|
||||
ToVector().Sort(cmp);
|
||||
#ifdef DEBUG
|
||||
for (int i = 1; i < length_; i++)
|
||||
ASSERT(cmp(&data_[i - 1], &data_[i]) <= 0);
|
||||
@ -102,6 +109,12 @@ void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
|
||||
}
|
||||
|
||||
|
||||
template<typename T, class P>
|
||||
void List<T, P>::Sort() {
|
||||
Sort(PointerSpaceship<T>);
|
||||
}
|
||||
|
||||
|
||||
template<typename T, class P>
|
||||
void List<T, P>::Initialize(int capacity) {
|
||||
ASSERT(capacity >= 0);
|
||||
|
@ -46,6 +46,7 @@ namespace v8 { namespace internal {
|
||||
template <typename T, class P>
|
||||
class List {
|
||||
public:
|
||||
|
||||
INLINE(explicit List(int capacity)) { Initialize(capacity); }
|
||||
INLINE(~List()) { DeleteData(data_); }
|
||||
|
||||
@ -67,6 +68,8 @@ class List {
|
||||
|
||||
Vector<T> ToVector() { return Vector<T>(data_, length_); }
|
||||
|
||||
Vector<const T> ToConstVector() { return Vector<const T>(data_, length_); }
|
||||
|
||||
// Adds a copy of the given 'element' to the end of the list,
|
||||
// expanding the list if necessary.
|
||||
T& Add(const T& element);
|
||||
@ -92,11 +95,14 @@ class List {
|
||||
// Drops all but the first 'pos' elements from the list.
|
||||
INLINE(void Rewind(int pos));
|
||||
|
||||
bool Contains(const T& elm);
|
||||
|
||||
// Iterate through all list entries, starting at index 0.
|
||||
void Iterate(void (*callback)(T* x));
|
||||
|
||||
// Sort all list entries (using QuickSort)
|
||||
void Sort(int (*cmp)(const T* x, const T* y));
|
||||
void Sort();
|
||||
|
||||
INLINE(void Initialize(int capacity));
|
||||
|
||||
|
@ -670,7 +670,14 @@ void JSRegExp::JSRegExpVerify() {
|
||||
}
|
||||
case JSRegExp::JSCRE: {
|
||||
FixedArray* arr = FixedArray::cast(data());
|
||||
ASSERT(arr->get(JSRegExp::kJscreDataIndex)->IsFixedArray());
|
||||
Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
|
||||
ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
|
||||
break;
|
||||
}
|
||||
case JSRegExp::IRREGEXP: {
|
||||
FixedArray* arr = FixedArray::cast(data());
|
||||
Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
|
||||
ASSERT(jscre_data->IsFixedArray());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -279,6 +279,16 @@ bool StringShape::IsExternalTwoByte() {
|
||||
}
|
||||
|
||||
|
||||
uc32 FlatStringReader::Get(int index) {
|
||||
ASSERT(0 <= index && index <= length_);
|
||||
if (is_ascii_) {
|
||||
return static_cast<const byte*>(start_)[index];
|
||||
} else {
|
||||
return static_cast<const uc16*>(start_)[index];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Object::IsNumber() {
|
||||
return IsSmi() || IsHeapNumber();
|
||||
}
|
||||
@ -1142,6 +1152,13 @@ Object* FixedArray::get(int index) {
|
||||
}
|
||||
|
||||
|
||||
void FixedArray::set(int index, Smi* value) {
|
||||
ASSERT(reinterpret_cast<Object*>(value)->IsSmi());
|
||||
int offset = kHeaderSize + index * kPointerSize;
|
||||
WRITE_FIELD(this, offset, value);
|
||||
}
|
||||
|
||||
|
||||
void FixedArray::set(int index, Object* value) {
|
||||
ASSERT(index >= 0 && index < this->length());
|
||||
int offset = kHeaderSize + index * kPointerSize;
|
||||
@ -1747,6 +1764,7 @@ Code::Flags Code::flags() {
|
||||
|
||||
|
||||
void Code::set_flags(Code::Flags flags) {
|
||||
STATIC_ASSERT(Code::NUMBER_OF_KINDS <= (kFlagsKindMask >> kFlagsKindShift)+1);
|
||||
// Make sure that all call stubs have an arguments count.
|
||||
ASSERT(ExtractKindFromFlags(flags) != CALL_IC ||
|
||||
ExtractArgumentsCountFromFlags(flags) >= 0);
|
||||
@ -2213,6 +2231,22 @@ JSRegExp::Type JSRegExp::TypeTag() {
|
||||
}
|
||||
|
||||
|
||||
JSRegExp::Flags JSRegExp::GetFlags() {
|
||||
ASSERT(this->data()->IsFixedArray());
|
||||
Object* data = this->data();
|
||||
Smi* smi = Smi::cast(FixedArray::cast(data)->get(kFlagsIndex));
|
||||
return Flags(smi->value());
|
||||
}
|
||||
|
||||
|
||||
String* JSRegExp::Pattern() {
|
||||
ASSERT(this->data()->IsFixedArray());
|
||||
Object* data = this->data();
|
||||
String* pattern= String::cast(FixedArray::cast(data)->get(kSourceIndex));
|
||||
return pattern;
|
||||
}
|
||||
|
||||
|
||||
Object* JSRegExp::DataAt(int index) {
|
||||
ASSERT(TypeTag() != NOT_COMPILED);
|
||||
return FixedArray::cast(data())->get(index);
|
||||
|
@ -3501,6 +3501,57 @@ const unibrow::byte* String::ReadBlock(String* input,
|
||||
}
|
||||
|
||||
|
||||
FlatStringReader* FlatStringReader::top_ = NULL;
|
||||
|
||||
|
||||
FlatStringReader::FlatStringReader(Handle<String> str)
|
||||
: str_(str.location()),
|
||||
length_(str->length()),
|
||||
prev_(top_) {
|
||||
top_ = this;
|
||||
RefreshState();
|
||||
}
|
||||
|
||||
|
||||
FlatStringReader::FlatStringReader(Vector<const char> input)
|
||||
: str_(NULL),
|
||||
is_ascii_(true),
|
||||
length_(input.length()),
|
||||
start_(input.start()),
|
||||
prev_(top_) {
|
||||
top_ = this;
|
||||
}
|
||||
|
||||
|
||||
FlatStringReader::~FlatStringReader() {
|
||||
ASSERT_EQ(top_, this);
|
||||
top_ = prev_;
|
||||
}
|
||||
|
||||
|
||||
void FlatStringReader::RefreshState() {
|
||||
if (str_ == NULL) return;
|
||||
Handle<String> str(str_);
|
||||
StringShape shape(*str);
|
||||
ASSERT(str->IsFlat(shape));
|
||||
is_ascii_ = shape.IsAsciiRepresentation();
|
||||
if (is_ascii_) {
|
||||
start_ = str->ToAsciiVector().start();
|
||||
} else {
|
||||
start_ = str->ToUC16Vector().start();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void FlatStringReader::PostGarbageCollectionProcessing() {
|
||||
FlatStringReader* current = top_;
|
||||
while (current != NULL) {
|
||||
current->RefreshState();
|
||||
current = current->prev_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void StringInputBuffer::Seek(unsigned pos) {
|
||||
Reset(pos, input_);
|
||||
}
|
||||
|
@ -1498,9 +1498,12 @@ class FixedArray: public Array {
|
||||
|
||||
// Setter and getter for elements.
|
||||
inline Object* get(int index);
|
||||
// Setter that uses write barrier.
|
||||
inline void set(int index, Object* value);
|
||||
|
||||
// Setter with barrier mode.
|
||||
// Setter that doesn't need write barrier).
|
||||
inline void set(int index, Smi* value);
|
||||
// Setter with explicit barrier mode.
|
||||
inline void set(int index, Object* value, WriteBarrierMode mode);
|
||||
|
||||
// Setters for frequently used oddballs located in old space.
|
||||
@ -2114,14 +2117,17 @@ class Code: public HeapObject {
|
||||
CALL_IC,
|
||||
STORE_IC,
|
||||
KEYED_STORE_IC,
|
||||
// No more than eight kinds. The value currently encoded in three bits in
|
||||
// Flags.
|
||||
|
||||
// Pseudo-kinds.
|
||||
REGEXP = BUILTIN,
|
||||
FIRST_IC_KIND = LOAD_IC,
|
||||
LAST_IC_KIND = KEYED_STORE_IC
|
||||
};
|
||||
|
||||
enum {
|
||||
NUMBER_OF_KINDS = LAST_IC_KIND + 1
|
||||
NUMBER_OF_KINDS = KEYED_STORE_IC + 1
|
||||
};
|
||||
|
||||
// A state indicates that inline cache in this Code object contains
|
||||
@ -2272,7 +2278,6 @@ class Code: public HeapObject {
|
||||
static const int kFlagsTypeMask = 0x000001C0; // 111000000
|
||||
static const int kFlagsArgumentsCountMask = 0xFFFFFE00;
|
||||
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Code);
|
||||
};
|
||||
@ -2912,7 +2917,13 @@ class JSValue: public JSObject {
|
||||
// Regular expressions
|
||||
class JSRegExp: public JSObject {
|
||||
public:
|
||||
enum Type { NOT_COMPILED, JSCRE, ATOM };
|
||||
// Meaning of Type:
|
||||
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
|
||||
// JSCRE: A complex RegExp for JSCRE
|
||||
// ATOM: A simple string to match against using an indexOf operation.
|
||||
// IRREGEXP: Compiled with Irregexp.
|
||||
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
|
||||
enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
|
||||
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
|
||||
|
||||
class Flags {
|
||||
@ -2929,6 +2940,8 @@ class JSRegExp: public JSObject {
|
||||
DECL_ACCESSORS(data, Object)
|
||||
|
||||
inline Type TypeTag();
|
||||
inline Flags GetFlags();
|
||||
inline String* Pattern();
|
||||
inline Object* DataAt(int index);
|
||||
|
||||
static inline JSRegExp* cast(Object* obj);
|
||||
@ -2945,10 +2958,11 @@ class JSRegExp: public JSObject {
|
||||
static const int kTagIndex = 0;
|
||||
static const int kSourceIndex = kTagIndex + 1;
|
||||
static const int kFlagsIndex = kSourceIndex + 1;
|
||||
// These two are the same since the same entry is shared for
|
||||
// These three are the same since the same entry is shared for
|
||||
// different purposes in different types of regexps.
|
||||
static const int kAtomPatternIndex = kFlagsIndex + 1;
|
||||
static const int kJscreDataIndex = kFlagsIndex + 1;
|
||||
static const int kIrregexpDataIndex = kFlagsIndex + 1;
|
||||
static const int kDataSize = kAtomPatternIndex + 1;
|
||||
};
|
||||
|
||||
@ -3578,6 +3592,28 @@ class ExternalTwoByteString: public ExternalString {
|
||||
};
|
||||
|
||||
|
||||
// A flat string reader provides random access to the contents of a
|
||||
// string independent of the character width of the string. The handle
|
||||
// must be valid as long as the reader is being used.
|
||||
class FlatStringReader BASE_EMBEDDED {
|
||||
public:
|
||||
explicit FlatStringReader(Handle<String> str);
|
||||
explicit FlatStringReader(Vector<const char> input);
|
||||
~FlatStringReader();
|
||||
void RefreshState();
|
||||
inline uc32 Get(int index);
|
||||
int length() { return length_; }
|
||||
static void PostGarbageCollectionProcessing();
|
||||
private:
|
||||
String** str_;
|
||||
bool is_ascii_;
|
||||
int length_;
|
||||
const void* start_;
|
||||
FlatStringReader* prev_;
|
||||
static FlatStringReader* top_;
|
||||
};
|
||||
|
||||
|
||||
// Note that StringInputBuffers are not valid across a GC! To fix this
|
||||
// it would have to store a String Handle instead of a String* and
|
||||
// AsciiStringReadBlock would have to be modified to use memcpy.
|
||||
|
1100
src/parser.cc
1100
src/parser.cc
File diff suppressed because it is too large
Load Diff
@ -145,6 +145,9 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
|
||||
v8::Extension* extension);
|
||||
|
||||
|
||||
bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
|
||||
|
||||
|
||||
// Support for doing lazy compilation. The script is the script containing full
|
||||
// source of the script where the function is declared. The start_position and
|
||||
// end_position specifies the part of the script source which has the source
|
||||
|
605
src/regexp-macro-assembler-ia32.cc
Normal file
605
src/regexp-macro-assembler-ia32.cc
Normal file
@ -0,0 +1,605 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <string.h>
|
||||
#include "v8.h"
|
||||
#include "log.h"
|
||||
#include "ast.h"
|
||||
#include "macro-assembler.h"
|
||||
#include "regexp-macro-assembler-ia32.h"
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
/*
|
||||
* This assembler uses the following register assignment convention
|
||||
* - edx : current character, or kEndOfInput if current position is not
|
||||
* inside string. The kEndOfInput value is greater than 0xffff,
|
||||
* so any tests that don't check whether the current position
|
||||
* is inside the correct range should retain bits above the
|
||||
* 15th in their computations, and fail if the value is too
|
||||
* great.
|
||||
* - edi : current position in input, as negative offset from end of string.
|
||||
* - esi : end of input (points to byte after last character in input).
|
||||
* - ebp : points to the location above the registers on the stack,
|
||||
* as if by the "enter <register_count>" opcode.
|
||||
* - esp : points to tip of backtracking stack.
|
||||
*
|
||||
* The registers eax, ebx and ecx are free to use for computations.
|
||||
*
|
||||
* Each call to a public method should retain this convention.
|
||||
* The stack will have the following structure:
|
||||
* - int* capture_array (int[num_saved_registers_], for output).
|
||||
* - end of input (index of end of string, relative to *string_base)
|
||||
* - start of input (index of first character in string, relative
|
||||
* to *string_base)
|
||||
* - void** string_base (location of a handle containing the string)
|
||||
* - return address
|
||||
* - backup of esi
|
||||
* - backup of edi
|
||||
* ebp-> - old ebp
|
||||
* - register 0 ebp[-4]
|
||||
* - register 1 ebp[-8]
|
||||
* - ...
|
||||
*
|
||||
* The data before ebp must be placed there by the calling code, e.g.,
|
||||
* by calling the code as cast to:
|
||||
* bool (*match)(String** string_base,
|
||||
* int start_offset,
|
||||
* int end_offset,
|
||||
* int* capture_output_array)
|
||||
*/
|
||||
|
||||
#define __ masm_->
|
||||
|
||||
RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(
|
||||
Mode mode,
|
||||
int registers_to_save,
|
||||
bool ignore_case)
|
||||
: masm_(new MacroAssembler(NULL, kRegExpCodeSize)),
|
||||
constants_(kRegExpConstantsSize),
|
||||
mode_(mode),
|
||||
num_registers_(registers_to_save),
|
||||
num_saved_registers_(registers_to_save),
|
||||
ignore_case_(ignore_case),
|
||||
entry_label_(),
|
||||
start_label_(),
|
||||
success_label_(),
|
||||
exit_label_(),
|
||||
self_(Heap::undefined_value()) {
|
||||
__ jmp(&entry_label_); // We'll write the entry code later.
|
||||
__ bind(&start_label_); // And then continue from here.
|
||||
}
|
||||
|
||||
|
||||
RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
|
||||
delete masm_;
|
||||
// Unuse labels in case we throw away the assembler without calling GetCode.
|
||||
entry_label_.Unuse();
|
||||
start_label_.Unuse();
|
||||
success_label_.Unuse();
|
||||
exit_label_.Unuse();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
|
||||
ASSERT(by > 0);
|
||||
Label inside_string;
|
||||
__ add(Operand(edi), Immediate(by * char_size()));
|
||||
__ j(below, &inside_string);
|
||||
Backtrack();
|
||||
|
||||
__ bind(&inside_string);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
|
||||
ASSERT(reg >= 0);
|
||||
ASSERT(reg < num_registers_);
|
||||
__ add(register_location(reg), Immediate(by));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::Backtrack() {
|
||||
__ pop(ecx);
|
||||
__ add(Operand(ecx), Immediate(self_));
|
||||
__ jmp(Operand(ecx));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::Bind(Label* label) {
|
||||
__ bind(label);
|
||||
}
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
|
||||
Label* bitmap,
|
||||
Label* on_zero) {
|
||||
ReadCurrentChar(eax);
|
||||
__ sub(Operand(eax), Immediate(start));
|
||||
__ cmp(eax, 64); // FIXME: 64 = length_of_bitmap_in_bits.
|
||||
BranchOrBacktrack(greater_equal, on_zero);
|
||||
__ mov(ebx, eax);
|
||||
__ shr(ebx, 3);
|
||||
// TODO(lrn): Where is the bitmap stored? Pass the bitmap as argument instead.
|
||||
// __ mov(ecx, position_of_bitmap);
|
||||
__ movzx_b(ebx, Operand(ecx, ebx, times_1, 0));
|
||||
__ and_(eax, (1<<3)-1);
|
||||
__ bt(Operand(ebx), eax);
|
||||
__ j(carry, on_zero);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckCharacter(uc16 c, Label* on_equal) {
|
||||
__ cmp(edx, c);
|
||||
BranchOrBacktrack(equal, on_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
|
||||
__ cmp(edx, limit);
|
||||
BranchOrBacktrack(greater, on_greater);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
|
||||
__ cmp(edx, limit);
|
||||
BranchOrBacktrack(less, on_less);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure) {
|
||||
int byte_length = str.length() * char_size();
|
||||
int start_offset = cp_offset * char_size();
|
||||
__ mov(ebx, edi);
|
||||
__ add(Operand(ebx), Immediate(start_offset + byte_length));
|
||||
BranchOrBacktrack(greater_equal, on_failure);
|
||||
|
||||
ArraySlice constant_buffer = constants_.GetBuffer(str.length(), char_size());
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
if (mode_ == ASCII) {
|
||||
constant_buffer.at<char>(i) = static_cast<char>(str[i]);
|
||||
} else {
|
||||
memcpy(constant_buffer.location<void>(),
|
||||
str.start(),
|
||||
str.length() * sizeof(uc16));
|
||||
}
|
||||
}
|
||||
|
||||
__ mov(eax, edi);
|
||||
__ mov(ebx, esi);
|
||||
__ lea(edi, Operand(esi, edi, times_1, start_offset));
|
||||
LoadConstantBufferAddress(esi, &constant_buffer);
|
||||
__ mov(ecx, str.length());
|
||||
if (mode_ == ASCII) {
|
||||
__ rep_cmpsb();
|
||||
} else {
|
||||
ASSERT(mode_ == UC16);
|
||||
__ rep_cmpsw();
|
||||
}
|
||||
__ mov(esi, ebx);
|
||||
__ mov(edi, eax);
|
||||
BranchOrBacktrack(not_equal, on_failure);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
|
||||
Label* on_equal) {
|
||||
__ cmp(edi, register_location(register_index));
|
||||
BranchOrBacktrack(equal, on_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotBackReference(
|
||||
int start_reg, Label* on_no_match) {
|
||||
if (ignore_case_) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
Label fallthrough;
|
||||
__ mov(eax, register_location(start_reg));
|
||||
__ mov(ecx, register_location(start_reg + 1));
|
||||
__ sub(ecx, Operand(eax)); // Length to check.
|
||||
__ j(equal, &fallthrough); // Covers the case where it's not bound (-1,-1).
|
||||
__ mov(ebx, Operand(edi));
|
||||
__ push(esi);
|
||||
__ add(edi, Operand(esi));
|
||||
__ add(esi, Operand(eax));
|
||||
if (mode_ == ASCII) {
|
||||
__ rep_cmpsb();
|
||||
} else {
|
||||
__ rep_cmpsw();
|
||||
}
|
||||
__ pop(esi);
|
||||
__ mov(edi, Operand(ebx));
|
||||
BranchOrBacktrack(not_equal, on_no_match);
|
||||
__ bind(&fallthrough);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
|
||||
__ cmp(edx, c);
|
||||
BranchOrBacktrack(not_equal, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
__ mov(eax, Operand(edx));
|
||||
__ or_(eax, mask);
|
||||
__ cmp(eax, c);
|
||||
BranchOrBacktrack(not_equal, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
|
||||
uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
__ lea(eax, Operand(edx, -mask));
|
||||
__ or_(eax, mask);
|
||||
__ cmp(eax, c);
|
||||
BranchOrBacktrack(not_equal, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
|
||||
uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& destinations) {
|
||||
ReadCurrentChar(eax);
|
||||
__ sub(Operand(eax), Immediate(start));
|
||||
|
||||
__ mov(ecx, eax);
|
||||
__ shr(eax, 2);
|
||||
// FIXME: ecx must hold address of map
|
||||
__ movzx_b(eax, Operand(ecx, eax, times_1, 0));
|
||||
__ and_(ecx, 0x03);
|
||||
__ add(ecx, Operand(ecx));
|
||||
__ shr(eax); // Shift right cl times
|
||||
|
||||
Label second_bit_set, case_3, case_1;
|
||||
__ test(eax, Immediate(0x02));
|
||||
__ j(not_zero, &second_bit_set);
|
||||
__ test(eax, Immediate(0x01));
|
||||
__ j(not_zero, &case_1);
|
||||
// Case 0:
|
||||
__ jmp(destinations[0]);
|
||||
__ bind(&case_1);
|
||||
// Case 1:
|
||||
__ jmp(destinations[1]);
|
||||
__ bind(&second_bit_set);
|
||||
__ test(eax, Immediate(0x01));
|
||||
__ j(not_zero, &case_3);
|
||||
// Case 2
|
||||
__ jmp(destinations[2]);
|
||||
__ bind(&case_3);
|
||||
// Case 3:
|
||||
__ jmp(destinations[3]);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::DispatchByteMap(
|
||||
uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations) {
|
||||
Label fallthrough;
|
||||
ReadCurrentChar(eax);
|
||||
__ sub(Operand(eax), Immediate(start));
|
||||
__ cmp(eax, 64); // FIXME: 64 = size of map. Found somehow??
|
||||
__ j(greater_equal, &fallthrough);
|
||||
// FIXME: ecx must hold address of map
|
||||
__ movzx_b(eax, Operand(ecx, eax, times_1, 0));
|
||||
// jump table: jump to destinations[eax];
|
||||
|
||||
__ bind(&fallthrough);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::DispatchHighByteMap(
|
||||
byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations) {
|
||||
Label fallthrough;
|
||||
ReadCurrentChar(eax);
|
||||
__ shr(eax, 8);
|
||||
__ sub(Operand(eax), Immediate(start));
|
||||
__ cmp(eax, destinations.length() - start);
|
||||
__ j(greater_equal, &fallthrough);
|
||||
|
||||
// TODO(lrn) jumptable: jump to destinations[eax]
|
||||
__ bind(&fallthrough);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::EmitOrLink(Label* label) {
|
||||
UNREACHABLE(); // Has no use.
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::Fail() {
|
||||
__ mov(eax, 0);
|
||||
__ jmp(&exit_label_);
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
|
||||
// Finalize code - write the entry point code now we know how many
|
||||
// registers we need.
|
||||
|
||||
// Entry code:
|
||||
__ bind(&entry_label_);
|
||||
__ push(esi);
|
||||
__ push(edi);
|
||||
__ enter(Immediate(num_registers_ * sizeof(uint32_t)));
|
||||
__ mov(esi, Operand(ebp, kInputEndOffset));
|
||||
__ mov(edi, Operand(ebp, kInputStartOffset));
|
||||
__ sub(edi, Operand(esi));
|
||||
__ mov(edx, Operand(ebp, kInputBuffer));
|
||||
__ mov(edx, Operand(edx, 0));
|
||||
__ add(esi, Operand(edx));
|
||||
__ jmp(&start_label_);
|
||||
|
||||
// Exit code:
|
||||
__ bind(&success_label_);
|
||||
__ mov(ebx, Operand(ebp, kRegisterOutput));
|
||||
__ mov(ecx, Operand(ebp, kInputEndOffset));
|
||||
__ sub(ecx, Operand(ebp, kInputStartOffset));
|
||||
for (int i = 0; i < num_saved_registers_; i++) {
|
||||
__ mov(eax, register_location(i));
|
||||
__ sub(eax, Operand(ecx)); // Convert to index from start, not end.
|
||||
__ mov(Operand(ebx, i * sizeof(int32_t)), eax);
|
||||
}
|
||||
// copy captures to output
|
||||
__ mov(eax, Immediate(1));
|
||||
|
||||
__ bind(&exit_label_);
|
||||
__ leave();
|
||||
__ pop(edi);
|
||||
__ pop(esi);
|
||||
__ ret(0);
|
||||
|
||||
CodeDesc code_desc;
|
||||
masm_->GetCode(&code_desc);
|
||||
Handle<Code> code = Factory::NewCode(code_desc,
|
||||
NULL,
|
||||
Code::ComputeFlags(Code::REGEXP),
|
||||
self_);
|
||||
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)"));
|
||||
return Handle<Object>::cast(code);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::GoTo(Label* to) {
|
||||
__ jmp(to);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::IfRegisterGE(int reg,
|
||||
int comparand,
|
||||
Label* if_ge) {
|
||||
__ cmp(register_location(reg), Immediate(comparand));
|
||||
BranchOrBacktrack(greater_equal, if_ge);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
|
||||
int comparand,
|
||||
Label* if_lt) {
|
||||
__ cmp(register_location(reg), Immediate(comparand));
|
||||
BranchOrBacktrack(less, if_lt);
|
||||
}
|
||||
|
||||
|
||||
|
||||
RegExpMacroAssembler::IrregexpImplementation
|
||||
RegExpMacroAssemblerIA32::Implementation() {
|
||||
return kIA32Implementation;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
|
||||
Label* on_end_of_input) {
|
||||
ASSERT(cp_offset >= 0);
|
||||
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
|
||||
__ cmp(edi, -cp_offset);
|
||||
BranchOrBacktrack(less_equal, on_end_of_input);
|
||||
ReadChar(edx, cp_offset);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::PopCurrentPosition() {
|
||||
__ pop(edi);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::PopRegister(int register_index) {
|
||||
RecordRegister(register_index);
|
||||
__ pop(register_location(register_index));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
|
||||
// Check for preemption first.
|
||||
Label no_preempt;
|
||||
Label retry_preempt;
|
||||
// Check for preemption.
|
||||
ExternalReference stack_limit =
|
||||
ExternalReference::address_of_stack_guard_limit();
|
||||
__ cmp(esp, Operand::StaticVariable(stack_limit));
|
||||
__ j(above, &no_preempt);
|
||||
|
||||
__ push(edi); // Current position.
|
||||
__ push(edx); // Current character.
|
||||
// Restore original edi, esi.
|
||||
__ mov(edi, Operand(ebp, kBackup_edi));
|
||||
__ mov(esi, Operand(ebp, kBackup_esi));
|
||||
|
||||
__ bind(&retry_preempt);
|
||||
// simulate stack for Runtime call.
|
||||
__ push(Immediate(0)); // Dummy receiver
|
||||
__ CallRuntime(Runtime::kStackGuard, 0);
|
||||
__ cmp(esp, Operand::StaticVariable(stack_limit));
|
||||
__ j(below_equal, &retry_preempt);
|
||||
|
||||
__ pop(edx);
|
||||
__ pop(edi);
|
||||
__ mov(esi, Operand(ebp, kInputBuffer));
|
||||
__ mov(esi, Operand(esi, 0));
|
||||
__ add(esi, Operand(ebp, kInputEndOffset));
|
||||
|
||||
__ bind(&no_preempt);
|
||||
|
||||
Label cont;
|
||||
__ push(label, RelocInfo::NONE);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::PushCurrentPosition() {
|
||||
__ push(edi);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::PushRegister(int register_index) {
|
||||
__ push(register_location(register_index));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) {
|
||||
__ mov(edi, register_location(reg));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) {
|
||||
__ mov(esp, register_location(reg));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
|
||||
RecordRegister(register_index);
|
||||
__ mov(register_location(register_index), Immediate(to));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::Succeed() {
|
||||
__ jmp(&success_label_);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(
|
||||
int register_index) {
|
||||
__ mov(register_location(register_index), edi);
|
||||
}
|
||||
|
||||
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
|
||||
__ mov(register_location(reg), esp);
|
||||
}
|
||||
|
||||
|
||||
// Private methods:
|
||||
|
||||
Operand RegExpMacroAssemblerIA32::register_location(
|
||||
int register_index) {
|
||||
ASSERT(register_index < (1<<30));
|
||||
return Operand(ebp, -((register_index + 1) * sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
|
||||
size_t RegExpMacroAssemblerIA32::char_size() {
|
||||
return static_cast<size_t>(mode_);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
|
||||
Label* to) {
|
||||
if (condition < 0) { // No condition
|
||||
if (to == NULL) {
|
||||
Backtrack();
|
||||
return;
|
||||
}
|
||||
__ jmp(to);
|
||||
return;
|
||||
} else if (to == NULL) {
|
||||
Label skip;
|
||||
__ j(NegateCondition(condition), &skip);
|
||||
Backtrack();
|
||||
__ bind(&skip);
|
||||
return;
|
||||
}
|
||||
__ j(condition, to);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::Canonicalize(Register reg) {
|
||||
if (mode_ == ASCII) {
|
||||
Label end;
|
||||
__ cmp(Operand(reg), Immediate('a'));
|
||||
__ j(below, &end);
|
||||
__ cmp(Operand(reg), Immediate('z'));
|
||||
__ j(above, &end);
|
||||
__ sub(Operand(reg), Immediate('a' - 'A'));
|
||||
__ bind(&end);
|
||||
return;
|
||||
}
|
||||
ASSERT(mode_ == UC16);
|
||||
// TODO(lrn): Use some tables.
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::RecordRegister(int register_index) {
|
||||
if (register_index >= num_registers_) {
|
||||
num_registers_ = register_index + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::ReadChar(Register destination, int offset) {
|
||||
if (mode_ == ASCII) {
|
||||
__ movzx_b(destination, Operand(esi, edi, times_1, offset));
|
||||
return;
|
||||
}
|
||||
ASSERT(mode_ == UC16);
|
||||
__ movzx_w(destination, Operand(esi, edi, times_1, offset * 2));
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::ReadCurrentChar(Register destination) {
|
||||
__ mov(destination, edx);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::LoadConstantBufferAddress(Register reg,
|
||||
ArraySlice* buffer) {
|
||||
__ mov(reg, buffer->array());
|
||||
__ add(Operand(reg), Immediate(buffer->base_offset()));
|
||||
}
|
||||
|
||||
#undef __
|
||||
}}
|
162
src/regexp-macro-assembler-ia32.h
Normal file
162
src/regexp-macro-assembler-ia32.h
Normal file
@ -0,0 +1,162 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef REGEXP_MACRO_ASSEMBLER_IA32_H_
|
||||
#define REGEXP_MACRO_ASSEMBLER_IA32_H_
|
||||
|
||||
#if !(defined __arm__ || defined __thumb__ || defined ARM)
|
||||
|
||||
#include "regexp-macro-assembler.h"
|
||||
#include "macro-assembler-ia32.h"
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
|
||||
public:
|
||||
enum Mode {ASCII = 1, UC16 = 2};
|
||||
RegExpMacroAssemblerIA32(Mode mode, int registers_to_save, bool ignore_case);
|
||||
virtual ~RegExpMacroAssemblerIA32();
|
||||
virtual void AdvanceCurrentPosition(int by);
|
||||
virtual void AdvanceRegister(int reg, int by);
|
||||
virtual void Backtrack();
|
||||
virtual void Bind(Label* label);
|
||||
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
|
||||
virtual void CheckCharacter(uc16 c, Label* on_equal);
|
||||
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
|
||||
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
|
||||
virtual void CheckCharacters(Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure);
|
||||
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
|
||||
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
|
||||
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
|
||||
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
|
||||
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal);
|
||||
virtual void DispatchByteMap(uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void DispatchHalfNibbleMap(uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void DispatchHighByteMap(byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void EmitOrLink(Label* label);
|
||||
virtual void Fail();
|
||||
virtual Handle<Object> GetCode();
|
||||
virtual void GoTo(Label* label);
|
||||
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
|
||||
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
|
||||
virtual IrregexpImplementation Implementation();
|
||||
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
|
||||
virtual void PopCurrentPosition();
|
||||
virtual void PopRegister(int register_index);
|
||||
virtual void PushBacktrack(Label* label);
|
||||
virtual void PushCurrentPosition();
|
||||
virtual void PushRegister(int register_index);
|
||||
virtual void ReadCurrentPositionFromRegister(int reg);
|
||||
virtual void ReadStackPointerFromRegister(int reg);
|
||||
virtual void SetRegister(int register_index, int to);
|
||||
virtual void Succeed();
|
||||
virtual void WriteCurrentPositionToRegister(int reg);
|
||||
virtual void WriteStackPointerToRegister(int reg);
|
||||
private:
|
||||
// Offsets from ebp of arguments to function.
|
||||
static const int kBackup_edi = 1 * sizeof(uint32_t);
|
||||
static const int kBackup_esi= 2 * sizeof(uint32_t);
|
||||
static const int kInputBuffer = 4 * sizeof(uint32_t);
|
||||
static const int kInputStartOffset = 5 * sizeof(uint32_t);
|
||||
static const int kInputEndOffset = 6 * sizeof(uint32_t);
|
||||
static const int kRegisterOutput = 7 * sizeof(uint32_t);
|
||||
|
||||
// The ebp-relative location of a regexp register.
|
||||
Operand register_location(int register_index);
|
||||
|
||||
// Whether to implement case-insensitive matching.
|
||||
bool ignore_case();
|
||||
|
||||
// Byte size of chars in the string to match (decided by the Mode argument)
|
||||
size_t char_size();
|
||||
|
||||
// Records that a register is used. At the end, we need the number of
|
||||
// registers used.
|
||||
void RecordRegister(int register_index);
|
||||
|
||||
// Equivalent to a conditional branch to the label, unless the label
|
||||
// is NULL, in which case it is a conditional Backtrack.
|
||||
void BranchOrBacktrack(Condition condition, Label* to);
|
||||
|
||||
// Generate code to perform case-canonicalization on the register.
|
||||
void Canonicalize(Register register);
|
||||
|
||||
// Read a character from input at the given offset from the current
|
||||
// position.
|
||||
void ReadChar(Register destination, int offset);
|
||||
|
||||
// Load the address of a "constant buffer" (a slice of a byte array)
|
||||
// into a register. The address is computed from the ByteArray* address
|
||||
// and an offset. Uses no extra registers.
|
||||
void LoadConstantBufferAddress(Register reg, ArraySlice* buffer);
|
||||
|
||||
// Read the current character into the destination register.
|
||||
void ReadCurrentChar(Register destination);
|
||||
|
||||
// Initial size of code buffer.
|
||||
static const size_t kRegExpCodeSize = 1024;
|
||||
// Initial size of constant buffers allocated during compilation.
|
||||
static const int kRegExpConstantsSize = 256;
|
||||
// Only unroll loops up to this length.
|
||||
static const int kMaxInlineStringTests = 8;
|
||||
// Special "character" marking end of input.
|
||||
static const uint32_t kEndOfInput = ~0;
|
||||
|
||||
MacroAssembler* masm_;
|
||||
ByteArrayProvider constants_;
|
||||
// Which mode to generate code for (ASCII or UTF16).
|
||||
Mode mode_;
|
||||
// One greater than maximal register index actually used.
|
||||
int num_registers_;
|
||||
// Number of registers to output at the end (the saved registers
|
||||
// are always 0..num_saved_registers_-1)
|
||||
int num_saved_registers_;
|
||||
// Whether to generate code that is case-insensitive. Only relevant for
|
||||
// back-references.
|
||||
bool ignore_case_;
|
||||
Label entry_label_;
|
||||
Label start_label_;
|
||||
Label success_label_;
|
||||
Label exit_label_;
|
||||
// Handle used to represent the generated code object itself.
|
||||
Handle<Object> self_;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif // !ARM
|
||||
|
||||
#endif /* REGEXP_MACRO_ASSEMBLER_IA32_H_ */
|
266
src/regexp-macro-assembler-irregexp.cc
Normal file
266
src/regexp-macro-assembler-irregexp.cc
Normal file
@ -0,0 +1,266 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "v8.h"
|
||||
#include "ast.h"
|
||||
#include "bytecodes-irregexp.h"
|
||||
#include "assembler-irregexp.h"
|
||||
#include "assembler-irregexp-inl.h"
|
||||
#include "regexp-macro-assembler.h"
|
||||
#include "regexp-macro-assembler-irregexp.h"
|
||||
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
|
||||
}
|
||||
|
||||
|
||||
RegExpMacroAssemblerIrregexp::IrregexpImplementation
|
||||
RegExpMacroAssemblerIrregexp::Implementation() {
|
||||
return kBytecodeImplementation;
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
|
||||
assembler_->Bind(l);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
|
||||
assembler_->EmitOrLink(l);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
|
||||
assembler_->PopRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
|
||||
assembler_->PushRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
|
||||
int register_index) {
|
||||
assembler_->WriteCurrentPositionToRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
|
||||
int register_index) {
|
||||
assembler_->ReadCurrentPositionFromRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
|
||||
int register_index) {
|
||||
assembler_->WriteStackPointerToRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
|
||||
int register_index) {
|
||||
assembler_->ReadStackPointerFromRegister(register_index);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
|
||||
assembler_->SetRegister(register_index, to);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
|
||||
assembler_->AdvanceRegister(register_index, by);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::PopCurrentPosition() {
|
||||
assembler_->PopCurrentPosition();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::PushCurrentPosition() {
|
||||
assembler_->PushCurrentPosition();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::Backtrack() {
|
||||
assembler_->PopBacktrack();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::GoTo(Label* l) {
|
||||
assembler_->GoTo(l);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) {
|
||||
assembler_->PushBacktrack(l);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::Succeed() {
|
||||
assembler_->Succeed();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::Fail() {
|
||||
assembler_->Fail();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
|
||||
assembler_->AdvanceCP(by);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition(
|
||||
int register_index,
|
||||
Label* on_equal) {
|
||||
// TODO(erikcorry): Implement.
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
|
||||
Label* on_failure) {
|
||||
assembler_->LoadCurrentChar(cp_offset, on_failure);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
|
||||
Label* on_less) {
|
||||
assembler_->CheckCharacterLT(limit, on_less);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
|
||||
Label* on_greater) {
|
||||
assembler_->CheckCharacterGT(limit, on_greater);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) {
|
||||
assembler_->CheckCharacter(c, on_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c,
|
||||
Label* on_not_equal) {
|
||||
assembler_->CheckNotCharacter(c, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
assembler_->OrThenCheckNotCharacter(c, mask, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
|
||||
uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal) {
|
||||
assembler_->MinusOrThenCheckNotCharacter(c, mask, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
|
||||
Label* on_not_equal) {
|
||||
assembler_->CheckNotBackReference(start_reg, on_not_equal);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
|
||||
Label* bitmap,
|
||||
Label* on_zero) {
|
||||
assembler_->LookupMap1(start, bitmap, on_zero);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::DispatchHalfNibbleMap(
|
||||
uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& table) {
|
||||
assembler_->LookupMap2(start, half_nibble_map, table);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::DispatchByteMap(
|
||||
uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& table) {
|
||||
assembler_->LookupMap8(start, byte_map, table);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
|
||||
byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& table) {
|
||||
assembler_->LookupHighMap8(start, byte_map, table);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::CheckCharacters(
|
||||
Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure) {
|
||||
for (int i = str.length() - 1; i >= 0; i--) {
|
||||
assembler_->LoadCurrentChar(cp_offset + i, on_failure);
|
||||
assembler_->CheckNotCharacter(str[i], on_failure);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
|
||||
int comparand,
|
||||
Label* if_less_than) {
|
||||
ASSERT(comparand >= 0 && comparand <= 65535);
|
||||
assembler_->CheckRegisterLT(register_index, comparand, if_less_than);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
|
||||
int comparand,
|
||||
Label* if_greater_or_equal) {
|
||||
ASSERT(comparand >= 0 && comparand <= 65535);
|
||||
assembler_->CheckRegisterGE(register_index, comparand, if_greater_or_equal);
|
||||
}
|
||||
|
||||
|
||||
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() {
|
||||
Handle<ByteArray> array = Factory::NewByteArray(assembler_->length());
|
||||
assembler_->Copy(array->GetDataStartAddress());
|
||||
return array;
|
||||
}
|
||||
|
||||
} } // namespace v8::internal
|
93
src/regexp-macro-assembler-irregexp.h
Normal file
93
src/regexp-macro-assembler-irregexp.h
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_
|
||||
#define V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
|
||||
public:
|
||||
explicit RegExpMacroAssemblerIrregexp(IrregexpAssembler* assembler)
|
||||
: assembler_(assembler) {
|
||||
}
|
||||
virtual ~RegExpMacroAssemblerIrregexp();
|
||||
virtual void Bind(Label* label);
|
||||
virtual void EmitOrLink(Label* label);
|
||||
virtual void AdvanceCurrentPosition(int by); // Signed cp change.
|
||||
virtual void PopCurrentPosition();
|
||||
virtual void PushCurrentPosition();
|
||||
virtual void Backtrack();
|
||||
virtual void GoTo(Label* label);
|
||||
virtual void PushBacktrack(Label* label);
|
||||
virtual void Succeed();
|
||||
virtual void Fail();
|
||||
virtual void PopRegister(int register_index);
|
||||
virtual void PushRegister(int register_index);
|
||||
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
|
||||
virtual void SetRegister(int register_index, int to);
|
||||
virtual void WriteCurrentPositionToRegister(int reg);
|
||||
virtual void ReadCurrentPositionFromRegister(int reg);
|
||||
virtual void WriteStackPointerToRegister(int reg);
|
||||
virtual void ReadStackPointerFromRegister(int reg);
|
||||
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
|
||||
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
|
||||
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
|
||||
virtual void CheckCharacter(uc16 c, Label* on_equal);
|
||||
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
|
||||
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
|
||||
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
|
||||
uc16 mask,
|
||||
Label* on_not_equal);
|
||||
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
|
||||
virtual void CheckCharacters(Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure);
|
||||
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
|
||||
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
|
||||
virtual void DispatchHalfNibbleMap(uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void DispatchByteMap(uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void DispatchHighByteMap(byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations);
|
||||
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
|
||||
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
|
||||
|
||||
virtual IrregexpImplementation Implementation();
|
||||
virtual Handle<Object> GetCode();
|
||||
private:
|
||||
IrregexpAssembler* assembler_;
|
||||
};
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_REGEXP_MACRO_ASSEMBLER_IRREGEXP_H_
|
77
src/regexp-macro-assembler.cc
Normal file
77
src/regexp-macro-assembler.cc
Normal file
@ -0,0 +1,77 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <string.h>
|
||||
#include "v8.h"
|
||||
#include "ast.h"
|
||||
#include "assembler.h"
|
||||
#include "regexp-macro-assembler.h"
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
RegExpMacroAssembler::RegExpMacroAssembler() {
|
||||
}
|
||||
|
||||
|
||||
RegExpMacroAssembler::~RegExpMacroAssembler() {
|
||||
}
|
||||
|
||||
|
||||
ByteArrayProvider::ByteArrayProvider(unsigned int initial_size)
|
||||
: byte_array_size_(initial_size),
|
||||
current_byte_array_(),
|
||||
current_byte_array_free_offset_(initial_size) {}
|
||||
|
||||
|
||||
ArraySlice ByteArrayProvider::GetBuffer(unsigned int size,
|
||||
unsigned int elem_size) {
|
||||
ASSERT(size > 0);
|
||||
size_t byte_size = size * elem_size;
|
||||
int free_offset = current_byte_array_free_offset_;
|
||||
// align elements
|
||||
free_offset += elem_size - 1;
|
||||
free_offset = free_offset - (free_offset % elem_size);
|
||||
|
||||
if (free_offset + byte_size > byte_array_size_) {
|
||||
if (byte_size > (byte_array_size_ / 2)) {
|
||||
Handle<ByteArray> solo_buffer(Factory::NewByteArray(byte_size, TENURED));
|
||||
return ArraySlice(solo_buffer, 0);
|
||||
}
|
||||
current_byte_array_ = Factory::NewByteArray(byte_array_size_, TENURED);
|
||||
free_offset = 0;
|
||||
}
|
||||
current_byte_array_free_offset_ = free_offset + size;
|
||||
return ArraySlice(current_byte_array_, free_offset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ArraySlice ByteArrayProvider::GetBuffer(Vector<T> values) {
|
||||
ArraySlice slice = GetBuffer(values.length(), sizeof(T));
|
||||
memcpy(slice.location<void>(), values.start(), values.length() * sizeof(T));
|
||||
return slice;
|
||||
}
|
||||
} }
|
181
src/regexp-macro-assembler.h
Normal file
181
src/regexp-macro-assembler.h
Normal file
@ -0,0 +1,181 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
|
||||
#define V8_REGEXP_MACRO_ASSEMBLER_H_
|
||||
|
||||
namespace v8 { namespace internal {
|
||||
|
||||
|
||||
struct DisjunctDecisionRow {
|
||||
RegExpCharacterClass cc;
|
||||
Label* on_match;
|
||||
};
|
||||
|
||||
|
||||
class RegExpMacroAssembler {
|
||||
public:
|
||||
enum IrregexpImplementation {
|
||||
kIA32Implementation,
|
||||
kARMImplementation,
|
||||
kBytecodeImplementation};
|
||||
|
||||
RegExpMacroAssembler();
|
||||
virtual ~RegExpMacroAssembler();
|
||||
virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
|
||||
virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
|
||||
virtual void Backtrack() = 0;
|
||||
virtual void Bind(Label* label) = 0;
|
||||
// Check the current character against a bitmap. The range of the current
|
||||
// character must be from start to start + length_of_bitmap_in_bits.
|
||||
virtual void CheckBitmap(
|
||||
uc16 start, // The bitmap is indexed from this character.
|
||||
Label* bitmap, // Where the bitmap is emitted.
|
||||
Label* on_zero) = 0; // Where to go if the bit is 0. Fall through on 1.
|
||||
// Dispatch after looking the current character up in a 2-bits-per-entry
|
||||
// map. The destinations vector has up to 4 labels.
|
||||
virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
|
||||
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
|
||||
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
|
||||
// Check the current character for a match with a literal string. If we
|
||||
// fail to match then goto the on_failure label. End of input always
|
||||
// matches. If the label is NULL then we should pop a backtrack address off
|
||||
// the stack abnd go to that.
|
||||
virtual void CheckCharacters(
|
||||
Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure) = 0;
|
||||
// Check the current input position against a register. If the register is
|
||||
// equal to the current position then go to the label. If the label is NULL
|
||||
// then backtrack instead.
|
||||
virtual void CheckCurrentPosition(
|
||||
int register_index,
|
||||
Label* on_equal) = 0;
|
||||
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
|
||||
// Check the current character for a match with a literal character. If we
|
||||
// fail to match then goto the on_failure label. End of input always
|
||||
// matches. If the label is NULL then we should pop a backtrack address off
|
||||
// the stack and go to that.
|
||||
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
|
||||
// Bitwise or the current character with the given constant and then
|
||||
// check for a match with c.
|
||||
virtual void CheckNotCharacterAfterOr(uc16 c,
|
||||
uc16 or_with,
|
||||
Label* on_not_equal) = 0;
|
||||
// Subtract a constant from the current character, then or with the given
|
||||
// constant and then check for a match with c.
|
||||
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
|
||||
uc16 minus_then_or_with,
|
||||
Label* on_not_equal) = 0;
|
||||
// Dispatch after looking the current character up in a byte map. The
|
||||
// destinations vector has up to 256 labels.
|
||||
virtual void DispatchByteMap(
|
||||
uc16 start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations) = 0;
|
||||
virtual void DispatchHalfNibbleMap(
|
||||
uc16 start,
|
||||
Label* half_nibble_map,
|
||||
const Vector<Label*>& destinations) = 0;
|
||||
// Dispatch after looking the high byte of the current character up in a byte
|
||||
// map. The destinations vector has up to 256 labels.
|
||||
virtual void DispatchHighByteMap(
|
||||
byte start,
|
||||
Label* byte_map,
|
||||
const Vector<Label*>& destinations) = 0;
|
||||
virtual void EmitOrLink(Label* label) = 0;
|
||||
virtual void Fail() = 0;
|
||||
virtual Handle<Object> GetCode() = 0;
|
||||
virtual void GoTo(Label* label) = 0;
|
||||
// Check whether a register is >= a given constant and go to a label if it
|
||||
// is. Backtracks instead if the label is NULL.
|
||||
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
|
||||
// Check whether a register is < a given constant and go to a label if it is.
|
||||
// Backtracks instead if the label is NULL.
|
||||
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
|
||||
virtual IrregexpImplementation Implementation() = 0;
|
||||
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
|
||||
virtual void PopCurrentPosition() = 0;
|
||||
virtual void PopRegister(int register_index) = 0;
|
||||
virtual void PushBacktrack(Label* label) = 0;
|
||||
virtual void PushCurrentPosition() = 0;
|
||||
virtual void PushRegister(int register_index) = 0;
|
||||
virtual void ReadCurrentPositionFromRegister(int reg) = 0;
|
||||
virtual void ReadStackPointerFromRegister(int reg) = 0;
|
||||
virtual void SetRegister(int register_index, int to) = 0;
|
||||
virtual void Succeed() = 0;
|
||||
virtual void WriteCurrentPositionToRegister(int reg) = 0;
|
||||
virtual void WriteStackPointerToRegister(int reg) = 0;
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
|
||||
struct ArraySlice {
|
||||
public:
|
||||
ArraySlice(Handle<ByteArray> array, size_t offset)
|
||||
: array_(array), offset_(offset) {}
|
||||
Handle<ByteArray> array() { return array_; }
|
||||
// Offset in the byte array data.
|
||||
size_t offset() { return offset_; }
|
||||
// Offset from the ByteArray pointer.
|
||||
size_t base_offset() {
|
||||
return ByteArray::kHeaderSize - kHeapObjectTag + offset_;
|
||||
}
|
||||
template <typename T>
|
||||
T* location() {
|
||||
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_);
|
||||
}
|
||||
template <typename T>
|
||||
T& at(int idx) {
|
||||
return reinterpret_cast<T*>(array_->GetDataStartAddress() + offset_)[idx];
|
||||
}
|
||||
private:
|
||||
Handle<ByteArray> array_;
|
||||
size_t offset_;
|
||||
};
|
||||
|
||||
|
||||
class ByteArrayProvider {
|
||||
public:
|
||||
explicit ByteArrayProvider(unsigned int initial_size);
|
||||
// Provides a place to put "size" elements of size "element_size".
|
||||
// The information can be stored in the provided ByteArray at the "offset".
|
||||
// The offset is aligned to the element size.
|
||||
ArraySlice GetBuffer(unsigned int size,
|
||||
unsigned int element_size);
|
||||
template <typename T>
|
||||
ArraySlice GetBuffer(Vector<T> values);
|
||||
private:
|
||||
size_t byte_array_size_;
|
||||
Handle<ByteArray> current_byte_array_;
|
||||
int current_byte_array_free_offset_;
|
||||
};
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_REGEXP_MACRO_ASSEMBLER_H_
|
@ -288,7 +288,7 @@ static Object* Runtime_IsConstructCall(Arguments args) {
|
||||
|
||||
|
||||
static Object* Runtime_RegExpCompile(Arguments args) {
|
||||
HandleScope scope; // create a new handle scope
|
||||
HandleScope scope;
|
||||
ASSERT(args.length() == 3);
|
||||
CONVERT_CHECKED(JSRegExp, raw_re, args[0]);
|
||||
Handle<JSRegExp> re(raw_re);
|
||||
@ -786,7 +786,9 @@ static Object* Runtime_RegExpExec(Arguments args) {
|
||||
Handle<String> subject(raw_subject);
|
||||
Handle<Object> index(args[2]);
|
||||
ASSERT(index->IsNumber());
|
||||
return *RegExpImpl::Exec(regexp, subject, index);
|
||||
Handle<Object> result = RegExpImpl::Exec(regexp, subject, index);
|
||||
if (result.is_null()) return Failure::Exception();
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
@ -797,7 +799,9 @@ static Object* Runtime_RegExpExecGlobal(Arguments args) {
|
||||
Handle<JSRegExp> regexp(raw_regexp);
|
||||
CONVERT_CHECKED(String, raw_subject, args[1]);
|
||||
Handle<String> subject(raw_subject);
|
||||
return *RegExpImpl::ExecGlobal(regexp, subject);
|
||||
Handle<Object> result = RegExpImpl::ExecGlobal(regexp, subject);
|
||||
if (result.is_null()) return Failure::Exception();
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
@ -2444,7 +2448,7 @@ static Object* ConvertCase(Arguments args,
|
||||
// in the buffer
|
||||
Access<StringInputBuffer> buffer(&string_input_buffer);
|
||||
buffer->Reset(s);
|
||||
unibrow::uchar chars[unibrow::kMaxCaseConvertedSize];
|
||||
unibrow::uchar chars[Converter::kMaxWidth];
|
||||
int i = 0;
|
||||
// We can assume that the string is not empty
|
||||
uc32 current = buffer->GetNext();
|
||||
|
@ -93,13 +93,13 @@ static bool IsControlChar(char c) {
|
||||
}
|
||||
|
||||
|
||||
void StringStream::Add(const char* format, Vector<FmtElm> elms) {
|
||||
void StringStream::Add(Vector<const char> format, Vector<FmtElm> elms) {
|
||||
// If we already ran out of space then return immediately.
|
||||
if (space() == 0)
|
||||
return;
|
||||
int offset = 0;
|
||||
int elm = 0;
|
||||
while (format[offset] != '\0') {
|
||||
while (offset < format.length()) {
|
||||
if (format[offset] != '%' || elm == elms.length()) {
|
||||
Put(format[offset]);
|
||||
offset++;
|
||||
@ -111,12 +111,11 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
|
||||
// Skip over the whole control character sequence until the
|
||||
// format element type
|
||||
temp[format_length++] = format[offset++];
|
||||
// '\0' is not a control character so we don't have to
|
||||
// explicitly check for the end of the string
|
||||
while (IsControlChar(format[offset]))
|
||||
while (offset < format.length() && IsControlChar(format[offset]))
|
||||
temp[format_length++] = format[offset++];
|
||||
if (offset >= format.length())
|
||||
return;
|
||||
char type = format[offset];
|
||||
if (type == '\0') return;
|
||||
temp[format_length++] = type;
|
||||
temp[format_length] = '\0';
|
||||
offset++;
|
||||
@ -128,17 +127,36 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
|
||||
Add(value);
|
||||
break;
|
||||
}
|
||||
case 'w': {
|
||||
ASSERT_EQ(FmtElm::LC_STR, current.type_);
|
||||
Vector<const uc16> value = *current.data_.u_lc_str_;
|
||||
for (int i = 0; i < value.length(); i++)
|
||||
Put(static_cast<char>(value[i]));
|
||||
break;
|
||||
}
|
||||
case 'o': {
|
||||
ASSERT_EQ(FmtElm::OBJ, current.type_);
|
||||
Object* obj = current.data_.u_obj_;
|
||||
PrintObject(obj);
|
||||
break;
|
||||
}
|
||||
case 'i': case 'd': case 'u': case 'x': case 'c': case 'p': {
|
||||
case 'k': {
|
||||
ASSERT_EQ(FmtElm::INT, current.type_);
|
||||
int value = current.data_.u_int_;
|
||||
if (0x20 <= value && value <= 0x7F) {
|
||||
Put(value);
|
||||
} else if (value <= 0xff) {
|
||||
Add("\\x%02x", value);
|
||||
} else {
|
||||
Add("\\u%04x", value);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'i': case 'd': case 'u': case 'x': case 'c': case 'p': case 'X': {
|
||||
int value = current.data_.u_int_;
|
||||
EmbeddedVector<char, 24> formatted;
|
||||
OS::SNPrintF(formatted, temp.start(), value);
|
||||
Add(formatted.start());
|
||||
int length = OS::SNPrintF(formatted, temp.start(), value);
|
||||
Add(Vector<const char>(formatted.start(), length));
|
||||
break;
|
||||
}
|
||||
case 'f': case 'g': case 'G': case 'e': case 'E': {
|
||||
@ -154,10 +172,8 @@ void StringStream::Add(const char* format, Vector<FmtElm> elms) {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that the buffer is 0-terminated and doesn't contain any
|
||||
// other 0-characters.
|
||||
// Verify that the buffer is 0-terminated
|
||||
ASSERT(buffer_[length_] == '\0');
|
||||
ASSERT(strlen(buffer_) == length_);
|
||||
}
|
||||
|
||||
|
||||
@ -188,6 +204,11 @@ void StringStream::PrintObject(Object* o) {
|
||||
|
||||
|
||||
void StringStream::Add(const char* format) {
|
||||
Add(CStrVector(format));
|
||||
}
|
||||
|
||||
|
||||
void StringStream::Add(Vector<const char> format) {
|
||||
Add(format, Vector<FmtElm>::empty());
|
||||
}
|
||||
|
||||
@ -195,14 +216,14 @@ void StringStream::Add(const char* format) {
|
||||
void StringStream::Add(const char* format, FmtElm arg0) {
|
||||
const char argc = 1;
|
||||
FmtElm argv[argc] = { arg0 };
|
||||
Add(format, Vector<FmtElm>(argv, argc));
|
||||
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
|
||||
}
|
||||
|
||||
|
||||
void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1) {
|
||||
const char argc = 2;
|
||||
FmtElm argv[argc] = { arg0, arg1 };
|
||||
Add(format, Vector<FmtElm>(argv, argc));
|
||||
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
|
||||
}
|
||||
|
||||
|
||||
@ -210,7 +231,15 @@ void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1,
|
||||
FmtElm arg2) {
|
||||
const char argc = 3;
|
||||
FmtElm argv[argc] = { arg0, arg1, arg2 };
|
||||
Add(format, Vector<FmtElm>(argv, argc));
|
||||
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
|
||||
}
|
||||
|
||||
|
||||
void StringStream::Add(const char* format, FmtElm arg0, FmtElm arg1,
|
||||
FmtElm arg2, FmtElm arg3) {
|
||||
const char argc = 4;
|
||||
FmtElm argv[argc] = { arg0, arg1, arg2, arg3 };
|
||||
Add(CStrVector(format), Vector<FmtElm>(argv, argc));
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,17 +75,19 @@ class FmtElm {
|
||||
FmtElm(int value) : type_(INT) { data_.u_int_ = value; } // NOLINT
|
||||
explicit FmtElm(double value) : type_(DOUBLE) { data_.u_double_ = value; } // NOLINT
|
||||
FmtElm(const char* value) : type_(C_STR) { data_.u_c_str_ = value; } // NOLINT
|
||||
FmtElm(const Vector<const uc16>& value) : type_(LC_STR) { data_.u_lc_str_ = &value; } // NOLINT
|
||||
FmtElm(Object* value) : type_(OBJ) { data_.u_obj_ = value; } // NOLINT
|
||||
FmtElm(Handle<Object> value) : type_(HANDLE) { data_.u_handle_ = value.location(); } // NOLINT
|
||||
FmtElm(void* value) : type_(INT) { data_.u_int_ = reinterpret_cast<int>(value); } // NOLINT
|
||||
private:
|
||||
friend class StringStream;
|
||||
enum Type { INT, DOUBLE, C_STR, OBJ, HANDLE };
|
||||
enum Type { INT, DOUBLE, C_STR, LC_STR, OBJ, HANDLE };
|
||||
Type type_;
|
||||
union {
|
||||
int u_int_;
|
||||
double u_double_;
|
||||
const char* u_c_str_;
|
||||
const Vector<const uc16>* u_lc_str_;
|
||||
Object* u_obj_;
|
||||
Object** u_handle_;
|
||||
} data_;
|
||||
@ -108,11 +110,17 @@ class StringStream {
|
||||
bool Put(char c);
|
||||
bool Put(String* str);
|
||||
bool Put(String* str, int start, int end);
|
||||
void Add(const char* format, Vector<FmtElm> elms);
|
||||
void Add(Vector<const char> format, Vector<FmtElm> elms);
|
||||
void Add(const char* format);
|
||||
void Add(Vector<const char> format);
|
||||
void Add(const char* format, FmtElm arg0);
|
||||
void Add(const char* format, FmtElm arg0, FmtElm arg1);
|
||||
void Add(const char* format, FmtElm arg0, FmtElm arg1, FmtElm arg2);
|
||||
void Add(const char* format,
|
||||
FmtElm arg0,
|
||||
FmtElm arg1,
|
||||
FmtElm arg2,
|
||||
FmtElm arg3);
|
||||
|
||||
// Getting the message out.
|
||||
void OutputToStdOut();
|
||||
|
@ -892,7 +892,7 @@ Object* StubCompiler::CompileCallDebugPrepareStepIn(Code::Flags flags) {
|
||||
Object* StubCompiler::GetCodeWithFlags(Code::Flags flags) {
|
||||
CodeDesc desc;
|
||||
masm_.GetCode(&desc);
|
||||
Object* result = Heap::CreateCode(desc, NULL, flags);
|
||||
Object* result = Heap::CreateCode(desc, NULL, flags, NULL);
|
||||
#ifdef DEBUG
|
||||
if (FLAG_print_code_stubs && !result->IsFailure()) {
|
||||
Code::cast(result)->Print();
|
||||
|
432
src/unicode.cc
432
src/unicode.cc
File diff suppressed because one or more lines are too long
@ -44,7 +44,7 @@ typedef unsigned char byte;
|
||||
* The max length of the result of converting the case of a single
|
||||
* character.
|
||||
*/
|
||||
static const int kMaxCaseConvertedSize = 3;
|
||||
static const int kMaxMappingSize = 4;
|
||||
|
||||
template <class T, int size = 256>
|
||||
class Predicate {
|
||||
@ -80,12 +80,13 @@ class Mapping {
|
||||
friend class Test;
|
||||
int CalculateValue(uchar c, uchar n, uchar* result);
|
||||
struct CacheEntry {
|
||||
inline CacheEntry() : code_point_(0), offset_(0) { }
|
||||
inline CacheEntry() : code_point_(kNoChar), offset_(0) { }
|
||||
inline CacheEntry(uchar code_point, signed offset)
|
||||
: code_point_(code_point),
|
||||
offset_(offset) { }
|
||||
uchar code_point_ : 21;
|
||||
signed offset_ : 11;
|
||||
uchar code_point_;
|
||||
signed offset_;
|
||||
static const int kNoChar = (1 << 21) - 1;
|
||||
};
|
||||
static const int kSize = size;
|
||||
static const int kMask = kSize - 1;
|
||||
@ -222,45 +223,15 @@ struct Letter {
|
||||
struct Space {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct Titlecase {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct Number {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct DecimalDigit {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct Ideographic {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct WhiteSpace {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct HexDigit {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct AsciiHexDigit {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct BidiControl {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct JoinControl {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct Dash {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct Hyphen {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct LineTerminator {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct RegExpSpecialChar {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct CombiningMark {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
@ -268,12 +239,35 @@ struct ConnectorPunctuation {
|
||||
static bool Is(uchar c);
|
||||
};
|
||||
struct ToLowercase {
|
||||
static const int kMaxWidth = 3;
|
||||
static int Convert(uchar c,
|
||||
uchar n,
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
struct ToUppercase {
|
||||
static const int kMaxWidth = 3;
|
||||
static int Convert(uchar c,
|
||||
uchar n,
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
struct Ecma262Canonicalize {
|
||||
static const int kMaxWidth = 1;
|
||||
static int Convert(uchar c,
|
||||
uchar n,
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
struct Ecma262UnCanonicalize {
|
||||
static const int kMaxWidth = 4;
|
||||
static int Convert(uchar c,
|
||||
uchar n,
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
struct CanonicalizationRange {
|
||||
static const int kMaxWidth = 1;
|
||||
static int Convert(uchar c,
|
||||
uchar n,
|
||||
uchar* result,
|
||||
|
94
src/utils.h
94
src/utils.h
@ -83,6 +83,23 @@ static inline T RoundUp(T x, int m) {
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static int Spaceship(const T& a, const T& b) {
|
||||
if (a == b)
|
||||
return 0;
|
||||
else if (a < b)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static int PointerSpaceship(const T* a, const T* b) {
|
||||
return Spaceship<T>(*a, *b);
|
||||
}
|
||||
|
||||
|
||||
// Returns the smallest power of two which is >= x. If you pass in a
|
||||
// number that is already a power of two, it is returned as is.
|
||||
uint32_t RoundUpToPowerOf2(uint32_t x);
|
||||
@ -283,6 +300,15 @@ class Vector {
|
||||
return Vector<T>(NewArray<T>(length), length);
|
||||
}
|
||||
|
||||
// Returns a vector using the same backing storage as this one,
|
||||
// spanning from and including 'from', to but not including 'to'.
|
||||
Vector<T> SubVector(int from, int to) {
|
||||
ASSERT(from < length_);
|
||||
ASSERT(to <= length_);
|
||||
ASSERT(from < to);
|
||||
return Vector<T>(start() + from, to - from);
|
||||
}
|
||||
|
||||
// Returns the length of the vector.
|
||||
int length() const { return length_; }
|
||||
|
||||
@ -298,6 +324,10 @@ class Vector {
|
||||
return start_[index];
|
||||
}
|
||||
|
||||
T& first() { return start_[0]; }
|
||||
|
||||
T& last() { return start_[length_ - 1]; }
|
||||
|
||||
// Returns a clone of this vector with a new backing store.
|
||||
Vector<T> Clone() const {
|
||||
T* result = NewArray<T>(length_);
|
||||
@ -305,6 +335,18 @@ class Vector {
|
||||
return Vector<T>(result, length_);
|
||||
}
|
||||
|
||||
void Sort(int (*cmp)(const T*, const T*)) {
|
||||
typedef int (*RawComparer)(const void*, const void*);
|
||||
qsort(start(),
|
||||
length(),
|
||||
sizeof(T),
|
||||
reinterpret_cast<RawComparer>(cmp));
|
||||
}
|
||||
|
||||
void Sort() {
|
||||
Sort(PointerSpaceship<T>);
|
||||
}
|
||||
|
||||
// Releases the array underlying this vector. Once disposed the
|
||||
// vector is empty.
|
||||
void Dispose() {
|
||||
@ -465,6 +507,58 @@ static inline void CopyChars(sinkchar* dest, const sourcechar* src, int chars) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline int Load16(const byte* pc) {
|
||||
#ifdef CAN_READ_UNALIGNED
|
||||
return *reinterpret_cast<const uint16_t*>(pc);
|
||||
#else
|
||||
uint32_t word;
|
||||
word = pc[1];
|
||||
word |= pc[0] << 8;
|
||||
return word;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int Load32(const byte* pc) {
|
||||
#ifdef CAN_READ_UNALIGNED
|
||||
return *reinterpret_cast<const uint32_t*>(pc);
|
||||
#else
|
||||
uint32_t word;
|
||||
word = pc[3];
|
||||
word |= pc[2] << 8;
|
||||
word |= pc[1] << 16;
|
||||
word |= pc[0] << 24;
|
||||
return word;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline void Store16(byte* pc, uint16_t value) {
|
||||
#ifdef CAN_READ_UNALIGNED
|
||||
*reinterpret_cast<uint16_t*>(pc) = value;
|
||||
#else
|
||||
pc[1] = value;
|
||||
pc[0] = value >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline void Store32(byte* pc, uint32_t value) {
|
||||
#ifdef CAN_READ_UNALIGNED
|
||||
*reinterpret_cast<uint32_t*>(pc) = value;
|
||||
#else
|
||||
pc[3] = value;
|
||||
pc[2] = value >> 8;
|
||||
pc[1] = value >> 16;
|
||||
pc[0] = value >> 24;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} } // namespace v8::internal
|
||||
|
||||
#endif // V8_UTILS_H_
|
||||
|
@ -38,7 +38,7 @@ SOURCES = {
|
||||
'test-ast.cc', 'test-heap.cc', 'test-utils.cc', 'test-compiler.cc',
|
||||
'test-spaces.cc', 'test-mark-compact.cc', 'test-lock.cc',
|
||||
'test-conversions.cc', 'test-strings.cc', 'test-serialize.cc',
|
||||
'test-decls.cc', 'test-alloc.cc'
|
||||
'test-decls.cc', 'test-alloc.cc', 'test-regexp.cc'
|
||||
],
|
||||
'arch:arm': ['test-assembler-arm.cc', 'test-disasm-arm.cc'],
|
||||
'arch:ia32': ['test-assembler-ia32.cc', 'test-disasm-ia32.cc'],
|
||||
|
922
test/cctest/test-regexp.cc
Normal file
922
test/cctest/test-regexp.cc
Normal file
@ -0,0 +1,922 @@
|
||||
// Copyright 2006-2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <set>
|
||||
|
||||
#include "v8.h"
|
||||
|
||||
#include "cctest.h"
|
||||
#include "zone-inl.h"
|
||||
#include "parser.h"
|
||||
#include "ast.h"
|
||||
#include "jsregexp-inl.h"
|
||||
#include "assembler-irregexp.h"
|
||||
#include "regexp-macro-assembler.h"
|
||||
#include "regexp-macro-assembler-irregexp.h"
|
||||
#include "regexp-macro-assembler-ia32.h"
|
||||
#include "interpreter-irregexp.h"
|
||||
|
||||
|
||||
using namespace v8::internal;
|
||||
|
||||
|
||||
static SmartPointer<const char> Parse(const char* input) {
|
||||
v8::HandleScope scope;
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
FlatStringReader reader(CStrVector(input));
|
||||
RegExpParseResult result;
|
||||
CHECK(v8::internal::ParseRegExp(&reader, &result));
|
||||
CHECK(result.tree != NULL);
|
||||
CHECK(result.error.is_null());
|
||||
SmartPointer<const char> output = result.tree->ToString();
|
||||
return output;
|
||||
}
|
||||
|
||||
static bool ParseEscapes(const char* input) {
|
||||
v8::HandleScope scope;
|
||||
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
FlatStringReader reader(CStrVector(input));
|
||||
RegExpParseResult result;
|
||||
CHECK(v8::internal::ParseRegExp(&reader, &result));
|
||||
CHECK(result.tree != NULL);
|
||||
CHECK(result.error.is_null());
|
||||
return result.has_character_escapes;
|
||||
}
|
||||
|
||||
|
||||
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
|
||||
#define CHECK_ESCAPES(input, has_escapes) CHECK_EQ(has_escapes, \
|
||||
ParseEscapes(input));
|
||||
|
||||
TEST(Parser) {
|
||||
V8::Initialize(NULL);
|
||||
CHECK_PARSE_EQ("abc", "'abc'");
|
||||
CHECK_PARSE_EQ("", "%");
|
||||
CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
|
||||
CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
|
||||
CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
|
||||
CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
|
||||
CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
|
||||
CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
|
||||
CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
|
||||
CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
|
||||
CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
|
||||
CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
|
||||
CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
|
||||
CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
|
||||
CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
|
||||
CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
|
||||
CHECK_PARSE_EQ("(?:foo)", "'foo'");
|
||||
CHECK_PARSE_EQ("(?: foo )", "' foo '");
|
||||
CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
|
||||
CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
|
||||
CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
|
||||
CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
|
||||
CHECK_PARSE_EQ("()", "(^ %)");
|
||||
CHECK_PARSE_EQ("(?=)", "(-> + %)");
|
||||
CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
|
||||
CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
|
||||
CHECK_PARSE_EQ("[x]", "[x]");
|
||||
CHECK_PARSE_EQ("[xyz]", "[x y z]");
|
||||
CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
|
||||
CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
|
||||
CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
|
||||
CHECK_PARSE_EQ("]", "']'");
|
||||
CHECK_PARSE_EQ("}", "'}'");
|
||||
CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
|
||||
CHECK_PARSE_EQ("[\\d]", "[0-9]");
|
||||
CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
|
||||
CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
|
||||
CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
|
||||
CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
|
||||
"'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
|
||||
CHECK_PARSE_EQ("\\c!", "'c!'");
|
||||
CHECK_PARSE_EQ("\\c_", "'c_'");
|
||||
CHECK_PARSE_EQ("\\c~", "'c~'");
|
||||
CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
|
||||
CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
|
||||
CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
|
||||
CHECK_PARSE_EQ("\\0", "'\\x00'");
|
||||
CHECK_PARSE_EQ("\\8", "'8'");
|
||||
CHECK_PARSE_EQ("\\9", "'9'");
|
||||
CHECK_PARSE_EQ("\\11", "'\\x09'");
|
||||
CHECK_PARSE_EQ("\\11a", "'\\x09a'");
|
||||
CHECK_PARSE_EQ("\\011", "'\\x09'");
|
||||
CHECK_PARSE_EQ("\\00011", "'\\x0011'");
|
||||
CHECK_PARSE_EQ("\\118", "'\\x098'");
|
||||
CHECK_PARSE_EQ("\\111", "'I'");
|
||||
CHECK_PARSE_EQ("\\1111", "'I1'");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (# 0 - g (<- 1)))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (# 0 - g (<- 2)))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (# 0 - g (<- 3)))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (# 0 - g '\\x04'))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
|
||||
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
|
||||
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
|
||||
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
|
||||
" (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
|
||||
CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
|
||||
CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
|
||||
CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
|
||||
CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
|
||||
CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
|
||||
CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");
|
||||
CHECK_PARSE_EQ("[\\0]", "[\\x00]");
|
||||
CHECK_PARSE_EQ("[\\11]", "[\\x09]");
|
||||
CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
|
||||
CHECK_PARSE_EQ("[\\011]", "[\\x09]");
|
||||
CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
|
||||
CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
|
||||
CHECK_PARSE_EQ("[\\111]", "[I]");
|
||||
CHECK_PARSE_EQ("[\\1111]", "[I 1]");
|
||||
CHECK_PARSE_EQ("\\x34", "'\x34'");
|
||||
CHECK_PARSE_EQ("\\x60", "'\x60'");
|
||||
CHECK_PARSE_EQ("\\x3z", "'x3z'");
|
||||
CHECK_PARSE_EQ("\\u0034", "'\x34'");
|
||||
CHECK_PARSE_EQ("\\u003z", "'u003z'");
|
||||
CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
|
||||
|
||||
CHECK_ESCAPES("a", false);
|
||||
CHECK_ESCAPES("a|b", false);
|
||||
CHECK_ESCAPES("a\\n", true);
|
||||
CHECK_ESCAPES("^a", false);
|
||||
CHECK_ESCAPES("a$", false);
|
||||
CHECK_ESCAPES("a\\b!", false);
|
||||
CHECK_ESCAPES("a\\Bb", false);
|
||||
CHECK_ESCAPES("a*", false);
|
||||
CHECK_ESCAPES("a*?", false);
|
||||
CHECK_ESCAPES("a?", false);
|
||||
CHECK_ESCAPES("a??", false);
|
||||
CHECK_ESCAPES("a{0,1}?", false);
|
||||
CHECK_ESCAPES("a{1,1}?", false);
|
||||
CHECK_ESCAPES("a{1,2}?", false);
|
||||
CHECK_ESCAPES("a+?", false);
|
||||
CHECK_ESCAPES("(a)", false);
|
||||
CHECK_ESCAPES("(a)\\1", false);
|
||||
CHECK_ESCAPES("(\\1a)", false);
|
||||
CHECK_ESCAPES("\\1(a)", false);
|
||||
CHECK_ESCAPES("a\\s", false);
|
||||
CHECK_ESCAPES("a\\S", false);
|
||||
CHECK_ESCAPES("a\\d", false);
|
||||
CHECK_ESCAPES("a\\D", false);
|
||||
CHECK_ESCAPES("a\\w", false);
|
||||
CHECK_ESCAPES("a\\W", false);
|
||||
CHECK_ESCAPES("a.", false);
|
||||
CHECK_ESCAPES("a\\q", true);
|
||||
CHECK_ESCAPES("a[a]", false);
|
||||
CHECK_ESCAPES("a[^a]", false);
|
||||
CHECK_ESCAPES("a[a-z]", false);
|
||||
CHECK_ESCAPES("a[\\q]", false);
|
||||
CHECK_ESCAPES("a(?:b)", false);
|
||||
CHECK_ESCAPES("a(?=b)", false);
|
||||
CHECK_ESCAPES("a(?!b)", false);
|
||||
CHECK_ESCAPES("\\x60", true);
|
||||
CHECK_ESCAPES("\\u0060", true);
|
||||
CHECK_ESCAPES("\\cA", true);
|
||||
CHECK_ESCAPES("\\q", true);
|
||||
CHECK_ESCAPES("\\1112", true);
|
||||
CHECK_ESCAPES("\\0", true);
|
||||
CHECK_ESCAPES("(a)\\1", false);
|
||||
|
||||
CHECK_PARSE_EQ("a{}", "'a{}'");
|
||||
CHECK_PARSE_EQ("a{,}", "'a{,}'");
|
||||
CHECK_PARSE_EQ("a{", "'a{'");
|
||||
CHECK_PARSE_EQ("a{z}", "'a{z}'");
|
||||
CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
|
||||
CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
|
||||
CHECK_PARSE_EQ("a{12,", "'a{12,'");
|
||||
CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
|
||||
CHECK_PARSE_EQ("{}", "'{}'");
|
||||
CHECK_PARSE_EQ("{,}", "'{,}'");
|
||||
CHECK_PARSE_EQ("{", "'{'");
|
||||
CHECK_PARSE_EQ("{z}", "'{z}'");
|
||||
CHECK_PARSE_EQ("{1z}", "'{1z}'");
|
||||
CHECK_PARSE_EQ("{12z}", "'{12z}'");
|
||||
CHECK_PARSE_EQ("{12,", "'{12,'");
|
||||
CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
|
||||
}
|
||||
|
||||
TEST(ParserRegression) {
|
||||
CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
|
||||
CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
|
||||
CHECK_PARSE_EQ("{", "'{'");
|
||||
CHECK_PARSE_EQ("a|", "(| 'a' %)");
|
||||
}
|
||||
|
||||
static void ExpectError(const char* input,
|
||||
const char* expected) {
|
||||
v8::HandleScope scope;
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
FlatStringReader reader(CStrVector(input));
|
||||
RegExpParseResult result;
|
||||
CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
|
||||
CHECK(result.tree == NULL);
|
||||
CHECK(!result.error.is_null());
|
||||
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
|
||||
CHECK_EQ(expected, *str);
|
||||
}
|
||||
|
||||
|
||||
TEST(Errors) {
|
||||
V8::Initialize(NULL);
|
||||
const char* kEndBackslash = "\\ at end of pattern";
|
||||
ExpectError("\\", kEndBackslash);
|
||||
const char* kUnterminatedGroup = "Unterminated group";
|
||||
ExpectError("(foo", kUnterminatedGroup);
|
||||
const char* kInvalidGroup = "Invalid group";
|
||||
ExpectError("(?", kInvalidGroup);
|
||||
const char* kUnterminatedCharacterClass = "Unterminated character class";
|
||||
ExpectError("[", kUnterminatedCharacterClass);
|
||||
ExpectError("[a-", kUnterminatedCharacterClass);
|
||||
const char* kIllegalCharacterClass = "Illegal character class";
|
||||
ExpectError("[a-\\w]", kIllegalCharacterClass);
|
||||
const char* kEndControl = "\\c at end of pattern";
|
||||
ExpectError("\\c", kEndControl);
|
||||
const char* kNothingToRepeat = "Nothing to repeat";
|
||||
ExpectError("*", kNothingToRepeat);
|
||||
ExpectError("?", kNothingToRepeat);
|
||||
ExpectError("+", kNothingToRepeat);
|
||||
ExpectError("{1}", kNothingToRepeat);
|
||||
ExpectError("{1,2}", kNothingToRepeat);
|
||||
ExpectError("{1,}", kNothingToRepeat);
|
||||
}
|
||||
|
||||
|
||||
static bool IsDigit(uc16 c) {
|
||||
return ('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
|
||||
static bool NotDigit(uc16 c) {
|
||||
return !IsDigit(c);
|
||||
}
|
||||
|
||||
|
||||
static bool IsWhiteSpace(uc16 c) {
|
||||
switch (c) {
|
||||
case 0x09:
|
||||
case 0x0A:
|
||||
case 0x0B:
|
||||
case 0x0C:
|
||||
case 0x0d:
|
||||
case 0x20:
|
||||
case 0xA0:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
return true;
|
||||
default:
|
||||
return unibrow::Space::Is(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool NotWhiteSpace(uc16 c) {
|
||||
return !IsWhiteSpace(c);
|
||||
}
|
||||
|
||||
|
||||
static bool IsWord(uc16 c) {
|
||||
return ('a' <= c && c <= 'z')
|
||||
|| ('A' <= c && c <= 'Z')
|
||||
|| ('0' <= c && c <= '9')
|
||||
|| (c == '_');
|
||||
}
|
||||
|
||||
|
||||
static bool NotWord(uc16 c) {
|
||||
return !IsWord(c);
|
||||
}
|
||||
|
||||
|
||||
static bool Dot(uc16 c) {
|
||||
switch (c) {
|
||||
// CR LF LS PS
|
||||
case 0x000A: case 0x000D: case 0x2028: case 0x2029:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
|
||||
ZoneScope scope(DELETE_ON_EXIT);
|
||||
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
|
||||
CharacterRange::AddClassEscape(c, ranges);
|
||||
for (unsigned i = 0; i < (1 << 16); i++) {
|
||||
bool in_class = false;
|
||||
for (int j = 0; !in_class && j < ranges->length(); j++) {
|
||||
CharacterRange& range = ranges->at(j);
|
||||
in_class = (range.from() <= i && i <= range.to());
|
||||
}
|
||||
CHECK_EQ(pred(i), in_class);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(CharacterClassEscapes) {
|
||||
TestCharacterClassEscapes('.', Dot);
|
||||
TestCharacterClassEscapes('d', IsDigit);
|
||||
TestCharacterClassEscapes('D', NotDigit);
|
||||
TestCharacterClassEscapes('s', IsWhiteSpace);
|
||||
TestCharacterClassEscapes('S', NotWhiteSpace);
|
||||
TestCharacterClassEscapes('w', IsWord);
|
||||
TestCharacterClassEscapes('W', NotWord);
|
||||
}
|
||||
|
||||
|
||||
static RegExpNode* Compile(const char* input) {
|
||||
FlatStringReader reader(CStrVector(input));
|
||||
RegExpParseResult result;
|
||||
if (!v8::internal::ParseRegExp(&reader, &result))
|
||||
return NULL;
|
||||
RegExpNode* node = NULL;
|
||||
RegExpEngine::Compile(&result, &node, false);
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
static void Execute(const char* input,
|
||||
const char* str,
|
||||
bool dot_output = false) {
|
||||
v8::HandleScope scope;
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
RegExpNode* node = Compile(input);
|
||||
USE(node);
|
||||
#ifdef DEBUG
|
||||
if (dot_output) {
|
||||
RegExpEngine::DotPrint(input, node);
|
||||
exit(0);
|
||||
}
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
|
||||
TEST(Execution) {
|
||||
V8::Initialize(NULL);
|
||||
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
|
||||
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
|
||||
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
|
||||
}
|
||||
|
||||
|
||||
class TestConfig {
|
||||
public:
|
||||
typedef int Key;
|
||||
typedef int Value;
|
||||
static const int kNoKey;
|
||||
static const int kNoValue;
|
||||
static inline int Compare(int a, int b) {
|
||||
if (a < b)
|
||||
return -1;
|
||||
else if (a > b)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const int TestConfig::kNoKey = 0;
|
||||
const int TestConfig::kNoValue = 0;
|
||||
|
||||
|
||||
static int PseudoRandom(int i, int j) {
|
||||
return ~(~((i * 781) ^ (j * 329)));
|
||||
}
|
||||
|
||||
|
||||
TEST(SplayTreeSimple) {
|
||||
static const int kLimit = 1000;
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
ZoneSplayTree<TestConfig> tree;
|
||||
std::set<int> seen;
|
||||
#define CHECK_MAPS_EQUAL() do { \
|
||||
for (int k = 0; k < kLimit; k++) \
|
||||
CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc)); \
|
||||
} while (false)
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (int j = 0; j < 50; j++) {
|
||||
int next = PseudoRandom(i, j) % kLimit;
|
||||
if (seen.find(next) != seen.end()) {
|
||||
// We've already seen this one. Check the value and remove
|
||||
// it.
|
||||
ZoneSplayTree<TestConfig>::Locator loc;
|
||||
CHECK(tree.Find(next, &loc));
|
||||
CHECK_EQ(next, loc.key());
|
||||
CHECK_EQ(3 * next, loc.value());
|
||||
tree.Remove(next);
|
||||
seen.erase(next);
|
||||
CHECK_MAPS_EQUAL();
|
||||
} else {
|
||||
// Check that it wasn't there already and then add it.
|
||||
ZoneSplayTree<TestConfig>::Locator loc;
|
||||
CHECK(!tree.Find(next, &loc));
|
||||
CHECK(tree.Insert(next, &loc));
|
||||
CHECK_EQ(next, loc.key());
|
||||
loc.set_value(3 * next);
|
||||
seen.insert(next);
|
||||
CHECK_MAPS_EQUAL();
|
||||
}
|
||||
int val = PseudoRandom(j, i) % kLimit;
|
||||
for (int k = val; k >= 0; k--) {
|
||||
if (seen.find(val) != seen.end()) {
|
||||
ZoneSplayTree<TestConfig>::Locator loc;
|
||||
CHECK(tree.FindGreatestLessThan(val, &loc));
|
||||
CHECK_EQ(loc.key(), val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
val = PseudoRandom(i + j, i - j) % kLimit;
|
||||
for (int k = val; k < kLimit; k++) {
|
||||
if (seen.find(val) != seen.end()) {
|
||||
ZoneSplayTree<TestConfig>::Locator loc;
|
||||
CHECK(tree.FindLeastGreaterThan(val, &loc));
|
||||
CHECK_EQ(loc.key(), val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(DispatchTableConstruction) {
|
||||
// Initialize test data.
|
||||
static const int kLimit = 1000;
|
||||
static const int kRangeCount = 8;
|
||||
static const int kRangeSize = 16;
|
||||
uc16 ranges[kRangeCount][2 * kRangeSize];
|
||||
for (int i = 0; i < kRangeCount; i++) {
|
||||
Vector<uc16> range(ranges[i], 2 * kRangeSize);
|
||||
for (int j = 0; j < 2 * kRangeSize; j++) {
|
||||
range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
|
||||
}
|
||||
range.Sort();
|
||||
for (int j = 1; j < 2 * kRangeSize; j++) {
|
||||
CHECK(range[j-1] <= range[j]);
|
||||
}
|
||||
}
|
||||
// Enter test data into dispatch table.
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
DispatchTable table;
|
||||
for (int i = 0; i < kRangeCount; i++) {
|
||||
uc16* range = ranges[i];
|
||||
for (int j = 0; j < 2 * kRangeSize; j += 2)
|
||||
table.AddRange(CharacterRange(range[j], range[j + 1]), i);
|
||||
}
|
||||
// Check that the table looks as we would expect
|
||||
for (int p = 0; p < kLimit; p++) {
|
||||
OutSet* outs = table.Get(p);
|
||||
for (int j = 0; j < kRangeCount; j++) {
|
||||
uc16* range = ranges[j];
|
||||
bool is_on = false;
|
||||
for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
|
||||
is_on = (range[k] <= p && p <= range[k + 1]);
|
||||
CHECK_EQ(is_on, outs->Get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(Assembler) {
|
||||
V8::Initialize(NULL);
|
||||
byte codes[1024];
|
||||
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
|
||||
#define __ assembler.
|
||||
Label advance;
|
||||
Label look_for_foo;
|
||||
Label fail;
|
||||
__ GoTo(&look_for_foo);
|
||||
__ Bind(&advance);
|
||||
__ AdvanceCP(1);
|
||||
__ Bind(&look_for_foo);
|
||||
__ LoadCurrentChar(0, &fail);
|
||||
__ CheckNotCharacter('f', &advance);
|
||||
__ LoadCurrentChar(1, &fail);
|
||||
__ CheckNotCharacter('o', &advance);
|
||||
__ LoadCurrentChar(2, &fail);
|
||||
__ CheckNotCharacter('o', &advance);
|
||||
__ WriteCurrentPositionToRegister(0);
|
||||
__ WriteCurrentPositionToRegister(1, 2);
|
||||
__ Succeed();
|
||||
__ Bind(&fail);
|
||||
__ Fail();
|
||||
|
||||
v8::HandleScope scope;
|
||||
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
|
||||
assembler.Copy(array->GetDataStartAddress());
|
||||
int captures[2];
|
||||
|
||||
Handle<String> f1 =
|
||||
Factory::NewStringFromAscii(CStrVector("Now is the time"));
|
||||
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
|
||||
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
|
||||
|
||||
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
|
||||
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
|
||||
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
|
||||
CHECK_EQ(0, captures[0]);
|
||||
CHECK_EQ(2, captures[1]);
|
||||
|
||||
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
|
||||
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
|
||||
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
|
||||
CHECK_EQ(3, captures[0]);
|
||||
CHECK_EQ(5, captures[1]);
|
||||
}
|
||||
|
||||
|
||||
TEST(Assembler2) {
|
||||
V8::Initialize(NULL);
|
||||
byte codes[1024];
|
||||
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
|
||||
#define __ assembler.
|
||||
// /^.*foo/
|
||||
Label more_dots;
|
||||
Label unwind_dot;
|
||||
Label failure;
|
||||
Label foo;
|
||||
Label foo_failed;
|
||||
Label dot_match;
|
||||
// ^
|
||||
__ PushCurrentPosition();
|
||||
__ PushRegister(0);
|
||||
__ WriteCurrentPositionToRegister(0);
|
||||
__ PushBacktrack(&failure);
|
||||
__ GoTo(&dot_match);
|
||||
// .*
|
||||
__ Bind(&more_dots);
|
||||
__ AdvanceCP(1);
|
||||
__ Bind(&dot_match);
|
||||
__ PushCurrentPosition();
|
||||
__ PushBacktrack(&unwind_dot);
|
||||
__ LoadCurrentChar(0, &foo);
|
||||
__ CheckNotCharacter('\n', &more_dots);
|
||||
// foo
|
||||
__ Bind(&foo);
|
||||
__ CheckNotCharacter('f', &foo_failed);
|
||||
__ LoadCurrentChar(1, &foo_failed);
|
||||
__ CheckNotCharacter('o', &foo_failed);
|
||||
__ LoadCurrentChar(2, &foo_failed);
|
||||
__ CheckNotCharacter('o', &foo_failed);
|
||||
__ WriteCurrentPositionToRegister(1, 2);
|
||||
__ Succeed();
|
||||
__ Break();
|
||||
|
||||
__ Bind(&foo_failed);
|
||||
__ PopBacktrack();
|
||||
__ Break();
|
||||
|
||||
__ Bind(&unwind_dot);
|
||||
__ PopCurrentPosition();
|
||||
__ LoadCurrentChar(0, &foo_failed);
|
||||
__ GoTo(&foo);
|
||||
|
||||
__ Bind(&failure);
|
||||
__ PopRegister(0);
|
||||
__ PopCurrentPosition();
|
||||
__ Fail();
|
||||
|
||||
v8::HandleScope scope;
|
||||
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
|
||||
assembler.Copy(array->GetDataStartAddress());
|
||||
int captures[2];
|
||||
|
||||
Handle<String> f1 =
|
||||
Factory::NewStringFromAscii(CStrVector("Now is the time"));
|
||||
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
|
||||
CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0));
|
||||
|
||||
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz"));
|
||||
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
|
||||
CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0));
|
||||
CHECK_EQ(0, captures[0]);
|
||||
CHECK_EQ(2, captures[1]);
|
||||
|
||||
Handle<String> f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery"));
|
||||
Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
|
||||
CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0));
|
||||
CHECK_EQ(0, captures[0]);
|
||||
CHECK_EQ(5, captures[1]);
|
||||
|
||||
Handle<String> f4 =
|
||||
Factory::NewStringFromAscii(CStrVector("football buffoonery"));
|
||||
Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);
|
||||
CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0));
|
||||
CHECK_EQ(0, captures[0]);
|
||||
CHECK_EQ(14, captures[1]);
|
||||
|
||||
Handle<String> f5 =
|
||||
Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));
|
||||
Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);
|
||||
CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0));
|
||||
}
|
||||
|
||||
|
||||
TEST(MacroAssembler) {
|
||||
V8::Initialize(NULL);
|
||||
byte codes[1024];
|
||||
IrregexpAssembler assembler(Vector<byte>(codes, 1024));
|
||||
RegExpMacroAssemblerIrregexp m(&assembler);
|
||||
// ^f(o)o.
|
||||
Label fail, fail2, start;
|
||||
uc16 foo_chars[3];
|
||||
foo_chars[0] = 'f';
|
||||
foo_chars[1] = 'o';
|
||||
foo_chars[2] = 'o';
|
||||
Vector<const uc16> foo(foo_chars, 3);
|
||||
m.SetRegister(4, 42);
|
||||
m.PushRegister(4);
|
||||
m.AdvanceRegister(4, 42);
|
||||
m.GoTo(&start);
|
||||
m.Fail();
|
||||
m.Bind(&start);
|
||||
m.PushBacktrack(&fail2);
|
||||
m.CheckCharacters(foo, 0, &fail);
|
||||
m.WriteCurrentPositionToRegister(0);
|
||||
m.PushCurrentPosition();
|
||||
m.AdvanceCurrentPosition(3);
|
||||
m.WriteCurrentPositionToRegister(1);
|
||||
m.PopCurrentPosition();
|
||||
m.AdvanceCurrentPosition(1);
|
||||
m.WriteCurrentPositionToRegister(2);
|
||||
m.AdvanceCurrentPosition(1);
|
||||
m.WriteCurrentPositionToRegister(3);
|
||||
m.Succeed();
|
||||
|
||||
m.Bind(&fail);
|
||||
m.Backtrack();
|
||||
m.Succeed();
|
||||
|
||||
m.Bind(&fail2);
|
||||
m.PopRegister(0);
|
||||
m.Fail();
|
||||
|
||||
v8::HandleScope scope;
|
||||
|
||||
Handle<ByteArray> array = Factory::NewByteArray(assembler.length());
|
||||
assembler.Copy(array->GetDataStartAddress());
|
||||
int captures[5];
|
||||
|
||||
Handle<String> f1 =
|
||||
Factory::NewStringFromAscii(CStrVector("foobar"));
|
||||
Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
|
||||
CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
|
||||
CHECK_EQ(0, captures[0]);
|
||||
CHECK_EQ(3, captures[1]);
|
||||
CHECK_EQ(1, captures[2]);
|
||||
CHECK_EQ(2, captures[3]);
|
||||
CHECK_EQ(84, captures[4]);
|
||||
|
||||
Handle<String> f2 =
|
||||
Factory::NewStringFromAscii(CStrVector("barfoo"));
|
||||
Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
|
||||
CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
|
||||
CHECK_EQ(42, captures[0]);
|
||||
}
|
||||
|
||||
|
||||
TEST(AddInverseToTable) {
|
||||
static const int kLimit = 1000;
|
||||
static const int kRangeCount = 16;
|
||||
for (int t = 0; t < 10; t++) {
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
new ZoneList<CharacterRange>(kRangeCount);
|
||||
for (int i = 0; i < kRangeCount; i++) {
|
||||
int from = PseudoRandom(t + 87, i + 25) % kLimit;
|
||||
int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
|
||||
if (to > kLimit) to = kLimit;
|
||||
ranges->Add(CharacterRange(from, to));
|
||||
}
|
||||
DispatchTable table;
|
||||
DispatchTableConstructor cons(&table);
|
||||
cons.set_choice_index(0);
|
||||
cons.AddInverse(ranges);
|
||||
for (int i = 0; i < kLimit; i++) {
|
||||
bool is_on = false;
|
||||
for (int j = 0; !is_on && j < kRangeCount; j++)
|
||||
is_on = ranges->at(j).Contains(i);
|
||||
OutSet* set = table.Get(i);
|
||||
CHECK_EQ(is_on, set->Get(0) == false);
|
||||
}
|
||||
}
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
new ZoneList<CharacterRange>(1);
|
||||
ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
|
||||
DispatchTable table;
|
||||
DispatchTableConstructor cons(&table);
|
||||
cons.set_choice_index(0);
|
||||
cons.AddInverse(ranges);
|
||||
CHECK(!table.Get(0xFFFE)->Get(0));
|
||||
CHECK(table.Get(0xFFFF)->Get(0));
|
||||
}
|
||||
|
||||
|
||||
static uc32 canonicalize(uc32 c) {
|
||||
unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
|
||||
int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
|
||||
if (count == 0) {
|
||||
return c;
|
||||
} else {
|
||||
CHECK_EQ(1, count);
|
||||
return canon[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(LatinCanonicalize) {
|
||||
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
|
||||
for (char lower = 'a'; lower <= 'z'; lower++) {
|
||||
char upper = lower + ('A' - 'a');
|
||||
CHECK_EQ(canonicalize(lower), canonicalize(upper));
|
||||
unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = un_canonicalize.get(lower, '\0', uncanon);
|
||||
CHECK_EQ(2, length);
|
||||
CHECK_EQ(upper, uncanon[0]);
|
||||
CHECK_EQ(lower, uncanon[1]);
|
||||
}
|
||||
for (uc32 c = 128; c < (1 << 21); c++)
|
||||
CHECK_GE(canonicalize(c), 128);
|
||||
unibrow::Mapping<unibrow::ToUppercase> to_upper;
|
||||
for (uc32 c = 0; c < (1 << 21); c++) {
|
||||
unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
|
||||
int length = to_upper.get(c, '\0', upper);
|
||||
if (length == 0) {
|
||||
length = 1;
|
||||
upper[0] = c;
|
||||
}
|
||||
uc32 u = upper[0];
|
||||
if (length > 1 || (c >= 128 && u < 128))
|
||||
u = c;
|
||||
CHECK_EQ(u, canonicalize(c));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(SimplePropagation) {
|
||||
v8::HandleScope scope;
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
RegExpNode* node = Compile("(a|^b|c)");
|
||||
CHECK(node->info()->determine_start);
|
||||
}
|
||||
|
||||
|
||||
static uc32 CanonRange(uc32 c) {
|
||||
unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
|
||||
int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
|
||||
if (count == 0) {
|
||||
return c;
|
||||
} else {
|
||||
CHECK_EQ(1, count);
|
||||
return canon[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(RangeCanonicalization) {
|
||||
ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
|
||||
// Check that we arrive at the same result when using the basic
|
||||
// range canonicalization primitives as when using immediate
|
||||
// canonicalization.
|
||||
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
|
||||
for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
|
||||
int range = CanonRange(i);
|
||||
int indirect_length = 0;
|
||||
unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
if ((range & CharacterRange::kStartMarker) == 0) {
|
||||
indirect_length = un_canonicalize.get(i - range, '\0', indirect);
|
||||
for (int i = 0; i < indirect_length; i++)
|
||||
indirect[i] += range;
|
||||
} else {
|
||||
indirect_length = un_canonicalize.get(i, '\0', indirect);
|
||||
}
|
||||
unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int direct_length = un_canonicalize.get(i, '\0', direct);
|
||||
CHECK_EQ(direct_length, indirect_length);
|
||||
}
|
||||
// Check that we arrive at the same results when skipping over
|
||||
// canonicalization ranges.
|
||||
int next_block = 0;
|
||||
while (next_block < CharacterRange::kRangeCanonicalizeMax) {
|
||||
uc32 start = CanonRange(next_block);
|
||||
CHECK_NE((start & CharacterRange::kStartMarker), 0);
|
||||
unsigned dist = start & CharacterRange::kPayloadMask;
|
||||
unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int first_length = un_canonicalize.get(next_block, '\0', first);
|
||||
for (unsigned i = 1; i < dist; i++) {
|
||||
CHECK_EQ(i, CanonRange(i));
|
||||
unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
|
||||
CHECK_EQ(first_length, succ_length);
|
||||
for (int j = 0; j < succ_length; j++) {
|
||||
int calc = first[j] + i;
|
||||
int found = succ[j];
|
||||
CHECK_EQ(calc, found);
|
||||
}
|
||||
}
|
||||
next_block = next_block + dist;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void TestRangeCaseIndependence(CharacterRange input,
|
||||
Vector<CharacterRange> expected) {
|
||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||
int count = expected.length();
|
||||
ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
|
||||
input.AddCaseEquivalents(list);
|
||||
CHECK_EQ(count, list->length());
|
||||
for (int i = 0; i < list->length(); i++) {
|
||||
CHECK_EQ(expected[i].from(), list->at(i).from());
|
||||
CHECK_EQ(expected[i].to(), list->at(i).to());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void TestSimpleRangeCaseIndependence(CharacterRange input,
|
||||
CharacterRange expected) {
|
||||
EmbeddedVector<CharacterRange, 1> vector;
|
||||
vector[0] = expected;
|
||||
TestRangeCaseIndependence(input, vector);
|
||||
}
|
||||
|
||||
|
||||
TEST(CharacterRangeCaseIndependence) {
|
||||
TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
|
||||
CharacterRange::Singleton('A'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
|
||||
CharacterRange::Singleton('Z'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
|
||||
CharacterRange('A', 'Z'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
|
||||
CharacterRange('C', 'F'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
|
||||
CharacterRange('A', 'B'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
|
||||
CharacterRange('Y', 'Z'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
|
||||
CharacterRange('A', 'Z'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
|
||||
CharacterRange('a', 'z'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
|
||||
CharacterRange('c', 'f'));
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
|
||||
CharacterRange('a', 'z'));
|
||||
// Here we need to add [l-z] to complete the case independence of
|
||||
// [A-Za-z] but we expect [a-z] to be added since we always add a
|
||||
// whole block at a time.
|
||||
TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
|
||||
CharacterRange('a', 'z'));
|
||||
}
|
||||
|
||||
|
||||
TEST(Graph) {
|
||||
V8::Initialize(NULL);
|
||||
Execute("(x)?\\1y", "", true);
|
||||
}
|
@ -26,5 +26,5 @@
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Regression test for bug #743664.
|
||||
assertEquals("\x60\x60".replace(/\x60/g, "u"), "uu");
|
||||
assertEquals("\xAB\xAB".replace(/\xAB/g, "u"), "uu");
|
||||
assertEquals("uu", "\x60\x60".replace(/\x60/g, "u"));
|
||||
assertEquals("uu", "\xAB\xAB".replace(/\xAB/g, "u"));
|
||||
|
@ -89,7 +89,10 @@ assertEquals(result[6], 'F');
|
||||
// From ecma_3/RegExp/regress-334158.js
|
||||
assertTrue(/\ca/.test( "\x01" ));
|
||||
assertFalse(/\ca/.test( "\\ca" ));
|
||||
assertTrue(/\c[a/]/.test( "\x1ba/]" ));
|
||||
// Passes in KJS, fails in IrregularExpressions.
|
||||
// See http://code.google.com/p/v8/issues/detail?id=152
|
||||
//assertTrue(/\c[a/]/.test( "\x1ba/]" ));
|
||||
|
||||
|
||||
// Test that we handle \s and \S correctly inside some bizarre
|
||||
// character classes.
|
||||
|
28
test/mjsunit/regress/regress-149.js
Normal file
28
test/mjsunit/regress/regress-149.js
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
assertEquals(String.fromCharCode(0x26B), String.fromCharCode(0x2C62).toLowerCase());
|
@ -9134,6 +9134,32 @@ for (idx in languages) {
|
||||
assertEquals(munged_sizes[i - 1], munged.length, "munged size " + i);
|
||||
}
|
||||
|
||||
|
||||
function hex(x) {
|
||||
x &= 15;
|
||||
if (x < 10) {
|
||||
return String.fromCharCode(x + 48);
|
||||
} else {
|
||||
return String.fromCharCode(x + 97 - 10);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function dump_re(re) {
|
||||
var out = "";
|
||||
for (var i = 0; i < re.length; i++) {
|
||||
var c = re.charCodeAt(i);
|
||||
if (c >= 32 && c <= 126) {
|
||||
out += re[i];
|
||||
} else if (c < 256) {
|
||||
out += "\\x" + hex(c >> 4) + hex(c);
|
||||
} else {
|
||||
out += "\\u" + hex(c >> 12) + hex(c >> 8) + hex(c >> 4) + hex(c);
|
||||
}
|
||||
}
|
||||
print ("re = " + out);
|
||||
}
|
||||
|
||||
var thai_l_thingy = "\u0e44";
|
||||
var thai_l_regexp = new RegExp(thai_l_thingy);
|
||||
var thai_l_regexp2 = new RegExp("[" + thai_l_thingy + "]");
|
||||
|
@ -217,7 +217,7 @@ ecma_3/RegExp/regress-57631: FAIL_OK
|
||||
# depth 500. KJS detects the case, and return null from the match,
|
||||
# and passes this test (the test doesn't check for a correct return
|
||||
# value).
|
||||
ecma_3/RegExp/regress-119909: FAIL_OK
|
||||
ecma_3/RegExp/regress-119909: PASS || FAIL_OK
|
||||
|
||||
|
||||
# Difference in the way capturing subpatterns work. In JS, when the
|
||||
@ -236,6 +236,13 @@ ecma_3/RegExp/regress-209919: FAIL_OK
|
||||
ecma_3/RegExp/regress-330684: FAIL_OK
|
||||
|
||||
|
||||
# This test contains a regexp that runs exponentially long. Spidermonkey
|
||||
# standalone will hang, though apparently inside Firefox it will trigger a
|
||||
# long-running-script timeout. JSCRE passes by hitting the matchLimit and
|
||||
# just pretending that an exhaustive search found no match.
|
||||
ecma_3/RegExp/regress-307456: PASS || FAIL_OK
|
||||
|
||||
|
||||
# We do not detect overflow in bounds for back references and {}
|
||||
# quantifiers. Might fix by parsing numbers differently?
|
||||
js1_5/Regress/regress-230216-2: FAIL_OK
|
||||
@ -247,11 +254,11 @@ js1_5/Regress/regress-247179: FAIL_OK
|
||||
|
||||
|
||||
# Regexp too long for PCRE.
|
||||
js1_5/Regress/regress-280769: FAIL_OK
|
||||
js1_5/Regress/regress-280769-1: FAIL_OK
|
||||
js1_5/Regress/regress-280769-2: FAIL_OK
|
||||
js1_5/Regress/regress-280769-4: FAIL_OK
|
||||
js1_5/Regress/regress-280769-5: FAIL_OK
|
||||
js1_5/Regress/regress-280769: PASS || FAIL
|
||||
js1_5/Regress/regress-280769-1: PASS || FAIL
|
||||
js1_5/Regress/regress-280769-2: PASS || FAIL
|
||||
js1_5/Regress/regress-280769-4: PASS || FAIL
|
||||
js1_5/Regress/regress-280769-5: PASS || FAIL
|
||||
|
||||
|
||||
# We do not support static RegExp.multiline - should we?.
|
||||
@ -489,7 +496,7 @@ js1_5/Regress/regress-336100: FAIL_OK
|
||||
# behavior and not the ECMA spec.
|
||||
ecma_3/RegExp/15.10.2-1: FAIL_OK
|
||||
ecma_3/RegExp/perlstress-001: FAIL_OK
|
||||
ecma_3/RegExp/regress-334158: FAIL_OK
|
||||
ecma_3/RegExp/regress-334158: PASS || FAIL
|
||||
|
||||
|
||||
# This test requires a failure if we try to compile a function with more
|
||||
|
Loading…
Reference in New Issue
Block a user