* Fix regexp benchmark regression where we were doing work to
make standard regexps like \s and . case independent. * Make use of the fact that the subject string is ASCII only when making character classes case independent. * Avoid spending time making large ideogram or punctuation ranges case independent when there is no case mapping anyway. Review URL: http://codereview.chromium.org/378024 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3243 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
493c9f072c
commit
b068a9f755
178
src/jsregexp.cc
178
src/jsregexp.cc
@ -2432,16 +2432,19 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void TextNode::MakeCaseIndependent() {
|
void TextNode::MakeCaseIndependent(bool is_ascii) {
|
||||||
int element_count = elms_->length();
|
int element_count = elms_->length();
|
||||||
for (int i = 0; i < element_count; i++) {
|
for (int i = 0; i < element_count; i++) {
|
||||||
TextElement elm = elms_->at(i);
|
TextElement elm = elms_->at(i);
|
||||||
if (elm.type == TextElement::CHAR_CLASS) {
|
if (elm.type == TextElement::CHAR_CLASS) {
|
||||||
RegExpCharacterClass* cc = elm.data.u_char_class;
|
RegExpCharacterClass* cc = elm.data.u_char_class;
|
||||||
|
// None of the standard character classses is different in the case
|
||||||
|
// independent case and it slows us down if we don't know that.
|
||||||
|
if (cc->is_standard()) continue;
|
||||||
ZoneList<CharacterRange>* ranges = cc->ranges();
|
ZoneList<CharacterRange>* ranges = cc->ranges();
|
||||||
int range_count = ranges->length();
|
int range_count = ranges->length();
|
||||||
for (int j = 0; j < range_count; j++) {
|
for (int j = 0; j < range_count; j++) {
|
||||||
ranges->at(j).AddCaseEquivalents(ranges);
|
ranges->at(j).AddCaseEquivalents(ranges, is_ascii);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3912,19 +3915,31 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
|
static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
|
||||||
|
int bottom,
|
||||||
|
int top);
|
||||||
|
|
||||||
|
|
||||||
|
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
||||||
|
bool is_ascii) {
|
||||||
|
uc16 bottom = from();
|
||||||
|
uc16 top = to();
|
||||||
|
if (is_ascii) {
|
||||||
|
if (bottom > String::kMaxAsciiCharCode) return;
|
||||||
|
if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
|
||||||
|
}
|
||||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||||
if (IsSingleton()) {
|
if (top == bottom) {
|
||||||
// If this is a singleton we just expand the one character.
|
// If this is a singleton we just expand the one character.
|
||||||
int length = uncanonicalize.get(from(), '\0', chars);
|
int length = uncanonicalize.get(bottom, '\0', chars);
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
uc32 chr = chars[i];
|
uc32 chr = chars[i];
|
||||||
if (chr != from()) {
|
if (chr != bottom) {
|
||||||
ranges->Add(CharacterRange::Singleton(chars[i]));
|
ranges->Add(CharacterRange::Singleton(chars[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (from() <= kRangeCanonicalizeMax &&
|
} else if (bottom <= kRangeCanonicalizeMax &&
|
||||||
to() <= kRangeCanonicalizeMax) {
|
top <= kRangeCanonicalizeMax) {
|
||||||
// If this is a range we expand the characters block by block,
|
// If this is a range we expand the characters block by block,
|
||||||
// expanding contiguous subranges (blocks) one at a time.
|
// expanding contiguous subranges (blocks) one at a time.
|
||||||
// The approach is as follows. For a given start character we
|
// The approach is as follows. For a given start character we
|
||||||
@ -3943,14 +3958,14 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
|
|||||||
// completely contained in a block we do this for all the blocks
|
// completely contained in a block we do this for all the blocks
|
||||||
// covered by the range.
|
// covered by the range.
|
||||||
unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||||
// First, look up the block that contains the 'from' character.
|
// First, look up the block that contains the 'bottom' character.
|
||||||
int length = canonrange.get(from(), '\0', range);
|
int length = canonrange.get(bottom, '\0', range);
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
range[0] = from();
|
range[0] = bottom;
|
||||||
} else {
|
} else {
|
||||||
ASSERT_EQ(1, length);
|
ASSERT_EQ(1, length);
|
||||||
}
|
}
|
||||||
int pos = from();
|
int pos = bottom;
|
||||||
// The start of the current block. Note that except for the first
|
// The start of the current block. Note that except for the first
|
||||||
// iteration 'start' is always equal to 'pos'.
|
// iteration 'start' is always equal to 'pos'.
|
||||||
int start;
|
int start;
|
||||||
@ -3964,7 +3979,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
|
|||||||
// Then we add the ranges one at a time, incrementing the current
|
// Then we add the ranges one at a time, incrementing the current
|
||||||
// position to be after the last block each time. The position
|
// position to be after the last block each time. The position
|
||||||
// always points to the start of a block.
|
// always points to the start of a block.
|
||||||
while (pos < to()) {
|
while (pos < top) {
|
||||||
length = canonrange.get(start, '\0', range);
|
length = canonrange.get(start, '\0', range);
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
range[0] = start;
|
range[0] = start;
|
||||||
@ -3975,57 +3990,122 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
|
|||||||
// The start point of a block contains the distance to the end
|
// The start point of a block contains the distance to the end
|
||||||
// of the range.
|
// of the range.
|
||||||
int block_end = start + (range[0] & kPayloadMask) - 1;
|
int block_end = start + (range[0] & kPayloadMask) - 1;
|
||||||
int end = (block_end > to()) ? to() : block_end;
|
int end = (block_end > top) ? top : block_end;
|
||||||
length = uncanonicalize.get(start, '\0', range);
|
length = uncanonicalize.get(start, '\0', range);
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
uc32 c = range[i];
|
uc32 c = range[i];
|
||||||
uc16 range_from = c + (pos - start);
|
uc16 range_from = c + (pos - start);
|
||||||
uc16 range_to = c + (end - start);
|
uc16 range_to = c + (end - start);
|
||||||
if (!(from() <= range_from && range_to <= to())) {
|
if (!(bottom <= range_from && range_to <= top)) {
|
||||||
ranges->Add(CharacterRange(range_from, range_to));
|
ranges->Add(CharacterRange(range_from, range_to));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start = pos = block_end + 1;
|
start = pos = block_end + 1;
|
||||||
}
|
}
|
||||||
} else if (from() > 0 || to() < String::kMaxUC16CharCode) {
|
} else {
|
||||||
// Unibrow ranges don't work for high characters due to the "2^11 bug".
|
// Unibrow ranges don't work for high characters due to the "2^11 bug".
|
||||||
// Therefore we do something dumber for these ranges. We don't bother
|
// Therefore we do something dumber for these ranges.
|
||||||
// if the range is 0-max (as encountered at the start of an unanchored
|
AddUncanonicals(ranges, bottom, top);
|
||||||
// regexp).
|
}
|
||||||
ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
|
}
|
||||||
int bottom = from();
|
|
||||||
int top = to();
|
|
||||||
for (int i = bottom; i <= top; i++) {
|
static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
|
||||||
int length = uncanonicalize.get(i, '\0', chars);
|
int bottom,
|
||||||
for (int j = 0; j < length; j++) {
|
int top) {
|
||||||
uc32 chr = chars[j];
|
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||||
if (chr != i && chr < bottom || chr > top) {
|
// Zones with no case mappings. There is a DEBUG-mode loop to assert that
|
||||||
characters->Add(chr);
|
// this table is correct.
|
||||||
|
// 0x0600 - 0x0fff
|
||||||
|
// 0x1100 - 0x1cff
|
||||||
|
// 0x2000 - 0x20ff
|
||||||
|
// 0x2200 - 0x23ff
|
||||||
|
// 0x2500 - 0x2bff
|
||||||
|
// 0x2e00 - 0xa5ff
|
||||||
|
// 0xa800 - 0xfaff
|
||||||
|
// 0xfc00 - 0xfeff
|
||||||
|
const int boundary_count = 18;
|
||||||
|
// The ASCII boundary and the kRangeCanonicalizeMax boundary are also in this
|
||||||
|
// array. This is to split up big ranges and not because they actually denote
|
||||||
|
// a case-mapping-free-zone.
|
||||||
|
ASSERT(CharacterRange::kRangeCanonicalizeMax < 0x600);
|
||||||
|
const int kFirstRealCaselessZoneIndex = 2;
|
||||||
|
int boundaries[] = {0x80, CharacterRange::kRangeCanonicalizeMax,
|
||||||
|
0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
|
||||||
|
0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
|
||||||
|
|
||||||
|
// Special ASCII rule from spec can save us some work here.
|
||||||
|
if (bottom == 0x80 && top == 0xffff) return;
|
||||||
|
|
||||||
|
// We have optimized support for this range.
|
||||||
|
if (top <= CharacterRange::kRangeCanonicalizeMax) {
|
||||||
|
CharacterRange range(bottom, top);
|
||||||
|
range.AddCaseEquivalents(ranges, false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split up very large ranges. This helps remove ranges where there are no
|
||||||
|
// case mappings.
|
||||||
|
for (int i = 0; i < boundary_count; i++) {
|
||||||
|
if (bottom < boundaries[i] && top >= boundaries[i]) {
|
||||||
|
AddUncanonicals(ranges, bottom, boundaries[i] - 1);
|
||||||
|
AddUncanonicals(ranges, boundaries[i], top);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are completely in a zone with no case mappings then we are done.
|
||||||
|
// We start at 2 so as not to except the ASCII range from mappings.
|
||||||
|
for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {
|
||||||
|
if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
|
||||||
|
#ifdef DEBUG
|
||||||
|
for (int j = bottom; j <= top; j++) {
|
||||||
|
unsigned current_char = j;
|
||||||
|
int length = uncanonicalize.get(current_char, '\0', chars);
|
||||||
|
for (int k = 0; k < length; k++) {
|
||||||
|
ASSERT(chars[k] == current_char);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step through the range finding equivalent characters.
|
||||||
|
ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
|
||||||
|
for (int i = bottom; i <= top; i++) {
|
||||||
|
int length = uncanonicalize.get(i, '\0', chars);
|
||||||
|
for (int j = 0; j < length; j++) {
|
||||||
|
uc32 chr = chars[j];
|
||||||
|
if (chr != i && chr < bottom || chr > top) {
|
||||||
|
characters->Add(chr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (characters->length() > 0) {
|
}
|
||||||
int new_from = characters->at(0);
|
|
||||||
int new_to = new_from;
|
// Step through the equivalent characters finding simple ranges and
|
||||||
for (int i = 1; i < characters->length(); i++) {
|
// adding ranges to the character class.
|
||||||
int chr = characters->at(i);
|
if (characters->length() > 0) {
|
||||||
if (chr == new_to + 1) {
|
int new_from = characters->at(0);
|
||||||
new_to++;
|
int new_to = new_from;
|
||||||
} else {
|
for (int i = 1; i < characters->length(); i++) {
|
||||||
if (new_to == new_from) {
|
int chr = characters->at(i);
|
||||||
ranges->Add(CharacterRange::Singleton(new_from));
|
if (chr == new_to + 1) {
|
||||||
} else {
|
new_to++;
|
||||||
ranges->Add(CharacterRange(new_from, new_to));
|
|
||||||
}
|
|
||||||
new_from = new_to = chr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (new_to == new_from) {
|
|
||||||
ranges->Add(CharacterRange::Singleton(new_from));
|
|
||||||
} else {
|
} else {
|
||||||
ranges->Add(CharacterRange(new_from, new_to));
|
if (new_to == new_from) {
|
||||||
|
ranges->Add(CharacterRange::Singleton(new_from));
|
||||||
|
} else {
|
||||||
|
ranges->Add(CharacterRange(new_from, new_to));
|
||||||
|
}
|
||||||
|
new_from = new_to = chr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (new_to == new_from) {
|
||||||
|
ranges->Add(CharacterRange::Singleton(new_from));
|
||||||
|
} else {
|
||||||
|
ranges->Add(CharacterRange(new_from, new_to));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4271,7 +4351,7 @@ void TextNode::CalculateOffsets() {
|
|||||||
|
|
||||||
void Analysis::VisitText(TextNode* that) {
|
void Analysis::VisitText(TextNode* that) {
|
||||||
if (ignore_case_) {
|
if (ignore_case_) {
|
||||||
that->MakeCaseIndependent();
|
that->MakeCaseIndependent(is_ascii_);
|
||||||
}
|
}
|
||||||
EnsureAnalyzed(that->on_success());
|
EnsureAnalyzed(that->on_success());
|
||||||
if (!has_failed()) {
|
if (!has_failed()) {
|
||||||
@ -4489,7 +4569,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
data->node = node;
|
data->node = node;
|
||||||
Analysis analysis(ignore_case);
|
Analysis analysis(ignore_case, is_ascii);
|
||||||
analysis.EnsureAnalyzed(node);
|
analysis.EnsureAnalyzed(node);
|
||||||
if (analysis.has_failed()) {
|
if (analysis.has_failed()) {
|
||||||
const char* error_message = analysis.error_message();
|
const char* error_message = analysis.error_message();
|
||||||
|
@ -200,7 +200,7 @@ class CharacterRange {
|
|||||||
bool is_valid() { return from_ <= to_; }
|
bool is_valid() { return from_ <= to_; }
|
||||||
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
|
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
|
||||||
bool IsSingleton() { return (from_ == to_); }
|
bool IsSingleton() { return (from_ == to_); }
|
||||||
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
|
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii);
|
||||||
static void Split(ZoneList<CharacterRange>* base,
|
static void Split(ZoneList<CharacterRange>* base,
|
||||||
Vector<const uc16> overlay,
|
Vector<const uc16> overlay,
|
||||||
ZoneList<CharacterRange>** included,
|
ZoneList<CharacterRange>** included,
|
||||||
@ -703,7 +703,7 @@ class TextNode: public SeqRegExpNode {
|
|||||||
int characters_filled_in,
|
int characters_filled_in,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
ZoneList<TextElement>* elements() { return elms_; }
|
ZoneList<TextElement>* elements() { return elms_; }
|
||||||
void MakeCaseIndependent();
|
void MakeCaseIndependent(bool is_ascii);
|
||||||
virtual int GreedyLoopTextLength();
|
virtual int GreedyLoopTextLength();
|
||||||
virtual TextNode* Clone() {
|
virtual TextNode* Clone() {
|
||||||
TextNode* result = new TextNode(*this);
|
TextNode* result = new TextNode(*this);
|
||||||
@ -1212,8 +1212,10 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
|||||||
// +-------+ +------------+
|
// +-------+ +------------+
|
||||||
class Analysis: public NodeVisitor {
|
class Analysis: public NodeVisitor {
|
||||||
public:
|
public:
|
||||||
explicit Analysis(bool ignore_case)
|
Analysis(bool ignore_case, bool is_ascii)
|
||||||
: ignore_case_(ignore_case), error_message_(NULL) { }
|
: ignore_case_(ignore_case),
|
||||||
|
is_ascii_(is_ascii),
|
||||||
|
error_message_(NULL) { }
|
||||||
void EnsureAnalyzed(RegExpNode* node);
|
void EnsureAnalyzed(RegExpNode* node);
|
||||||
|
|
||||||
#define DECLARE_VISIT(Type) \
|
#define DECLARE_VISIT(Type) \
|
||||||
@ -1232,6 +1234,7 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
|||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
bool ignore_case_;
|
bool ignore_case_;
|
||||||
|
bool is_ascii_;
|
||||||
const char* error_message_;
|
const char* error_message_;
|
||||||
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
|
||||||
|
@ -1466,7 +1466,7 @@ static void TestRangeCaseIndependence(CharacterRange input,
|
|||||||
ZoneScope zone_scope(DELETE_ON_EXIT);
|
ZoneScope zone_scope(DELETE_ON_EXIT);
|
||||||
int count = expected.length();
|
int count = expected.length();
|
||||||
ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
|
ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
|
||||||
input.AddCaseEquivalents(list);
|
input.AddCaseEquivalents(list, false);
|
||||||
CHECK_EQ(count, list->length());
|
CHECK_EQ(count, list->length());
|
||||||
for (int i = 0; i < list->length(); i++) {
|
for (int i = 0; i < list->length(); i++) {
|
||||||
CHECK_EQ(expected[i].from(), list->at(i).from());
|
CHECK_EQ(expected[i].from(), list->at(i).from());
|
||||||
|
@ -60,6 +60,7 @@ function Range(from, to, flags) {
|
|||||||
return new RegExp("[" + from + "-" + to + "]", flags);
|
return new RegExp("[" + from + "-" + to + "]", flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test Cyrillic and Greek separately.
|
||||||
for (var lang = 0; lang < 2; lang++) {
|
for (var lang = 0; lang < 2; lang++) {
|
||||||
var chars = (lang == 0) ? cyrillic : greek;
|
var chars = (lang == 0) ? cyrillic : greek;
|
||||||
|
|
||||||
@ -99,6 +100,7 @@ for (var lang = 0; lang < 2; lang++) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test range that covers both greek and cyrillic characters.
|
||||||
for (key in greek) {
|
for (key in greek) {
|
||||||
assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
|
assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
|
||||||
if (cyrillic[key]) {
|
if (cyrillic[key]) {
|
||||||
@ -106,7 +108,6 @@ for (key in greek) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (var i = 0; i < 2; i++) {
|
for (var i = 0; i < 2; i++) {
|
||||||
var ignore_case = (i == 0);
|
var ignore_case = (i == 0);
|
||||||
var flag = ignore_case ? "i" : "";
|
var flag = ignore_case ? "i" : "";
|
||||||
@ -118,6 +119,8 @@ for (var i = 0; i < 2; i++) {
|
|||||||
assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
|
assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
|
||||||
assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);
|
assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);
|
||||||
|
|
||||||
|
// A range that covers the lower case greek letters and the upper case cyrillic
|
||||||
|
// letters.
|
||||||
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
|
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
|
||||||
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
|
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
|
||||||
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);
|
assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);
|
||||||
@ -128,6 +131,10 @@ for (var i = 0; i < 2; i++) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Sigma is special because there are two lower case versions of the same upper
|
||||||
|
// case character. JS requires that case independece means that you should
|
||||||
|
// convert everything to upper case, so the two sigma variants are equal to each
|
||||||
|
// other in a case independt comparison.
|
||||||
for (var i = 0; i < 2; i++) {
|
for (var i = 0; i < 2; i++) {
|
||||||
var simple = (i != 0);
|
var simple = (i != 0);
|
||||||
var name = simple ? "" : "[]";
|
var name = simple ? "" : "[]";
|
||||||
@ -166,4 +173,36 @@ for (var i = 0; i < 2; i++) {
|
|||||||
assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
|
assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
|
||||||
}
|
}
|
||||||
|
|
||||||
print("ok");
|
|
||||||
|
// Test all non-ASCII characters individually to ensure that our optimizations
|
||||||
|
// didn't break anything.
|
||||||
|
for (var i = 0x80; i <= 0xfffe; i++) {
|
||||||
|
var c = String.fromCharCode(i);
|
||||||
|
var c2 = String.fromCharCode(i + 1);
|
||||||
|
var re = new RegExp("[" + c + "-" + c2 + "]", "i");
|
||||||
|
assertTrue(re.test(c), 57);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var add_non_ascii_character_to_subject = 0;
|
||||||
|
add_non_ascii_character_to_subject < 2;
|
||||||
|
add_non_ascii_character_to_subject++) {
|
||||||
|
var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
|
||||||
|
// A range that covers both ASCII and non-ASCII.
|
||||||
|
for (var i = 0; i < 2; i++) {
|
||||||
|
var full = (i != 0);
|
||||||
|
var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
|
||||||
|
var f = full ? "f" : "c";
|
||||||
|
for (var j = 0; j < 2; j++) {
|
||||||
|
var ignore_case = (j == 0);
|
||||||
|
var flag = ignore_case ? "i" : "";
|
||||||
|
var re = new RegExp(mixed, flag);
|
||||||
|
assertEquals(ignore_case || (full && add_non_ascii_character_to_subject),
|
||||||
|
re.test("A" + suffix),
|
||||||
|
58 + flag + f);
|
||||||
|
assertTrue(re.test("a" + suffix), 59 + flag + f);
|
||||||
|
assertTrue(re.test("~" + suffix), 60 + flag + f);
|
||||||
|
assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
|
||||||
|
assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user