[loong64][mips][regexp] Add dedicated enums for standard character sets

Port commit b4aa41d0fc

Change-Id: I00e7b81450a1a751b536d29bc4bb4b69ad57b7c6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3218720
Auto-Submit: Liu yu <liuyu@loongson.cn>
Reviewed-by: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/main@{#77351}
This commit is contained in:
Liu Yu 2021-10-12 20:33:24 +08:00 committed by V8 LUCI CQ
parent 7fdf5e141e
commit 728e209030
6 changed files with 199 additions and 202 deletions

View File

@ -476,11 +476,12 @@ void RegExpMacroAssemblerLOONG64::CheckBitInTable(Handle<ByteArray> table,
}
bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
base::uc16 type, Label* on_no_match) {
StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check.
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) {
case 's':
case StandardCharacterSet::kWhitespace:
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
@ -495,20 +496,20 @@ bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
return true;
}
return false;
case 'S':
case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough.
return false;
case 'd':
case StandardCharacterSet::kDigit:
// Match Latin1 digits ('0'..'9').
__ Sub_d(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
return true;
case 'D':
case StandardCharacterSet::kNotDigit:
// Match non Latin1-digits.
__ Sub_d(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
@ -523,7 +524,7 @@ bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
}
return true;
}
case 'n': {
case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
@ -542,7 +543,7 @@ bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
}
return true;
}
case 'w': {
case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
@ -554,7 +555,7 @@ bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
case 'W': {
case StandardCharacterSet::kNotWord: {
Label done;
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
@ -570,12 +571,9 @@ bool RegExpMacroAssemblerLOONG64::CheckSpecialCharacterClass(
}
return true;
}
case '*':
case StandardCharacterSet::kEverything:
// Match any character.
return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
}

View File

@ -52,7 +52,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerLOONG64
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match);
virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match);
virtual void Fail();
virtual Handle<HeapObject> GetCode(Handle<String> source);
virtual void GoTo(Label* label);

View File

@ -491,107 +491,105 @@ void RegExpMacroAssemblerMIPS::CheckBitInTable(
BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
}
bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(base::uc16 type,
Label* on_no_match) {
bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(
StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check.
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) {
case 's':
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
// Check range 0x09..0x0D.
__ Subu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
case StandardCharacterSet::kWhitespace:
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
// Check range 0x09..0x0D.
__ Subu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
return false;
case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough.
return false;
case StandardCharacterSet::kDigit:
// Match Latin1 digits ('0'..'9').
__ Subu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
return true;
case StandardCharacterSet::kNotDigit:
// Match non Latin1-digits.
__ Subu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Subu(a0, a0, Operand(0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
return false;
case 'S':
// The emitted code for generic character classes is good enough.
return false;
case 'd':
// Match Latin1 digits ('0'..'9').
__ Subu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
return true;
case 'D':
// Match non Latin1-digits.
__ Subu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Subu(a0, a0, Operand(0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Subu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
return true;
}
return true;
}
case 'n': {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Subu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
case StandardCharacterSet::kNotWord: {
Label done;
BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
if (mode_ != LATIN1) {
__ bind(&done);
}
return true;
}
return true;
}
case 'w': {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
case 'W': {
Label done;
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
if (mode_ != LATIN1) {
__ bind(&done);
}
return true;
}
case '*':
// Match any character.
return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
case StandardCharacterSet::kEverything:
// Match any character.
return true;
}
}

View File

@ -54,7 +54,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match);
virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match);
virtual void Fail();
virtual Handle<HeapObject> GetCode(Handle<String> source);
virtual void GoTo(Label* label);

View File

@ -523,107 +523,105 @@ void RegExpMacroAssemblerMIPS::CheckBitInTable(
BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
}
bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(base::uc16 type,
Label* on_no_match) {
bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(
StandardCharacterSet type, Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check.
// TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
switch (type) {
case 's':
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
// Check range 0x09..0x0D.
__ Dsubu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
case StandardCharacterSet::kWhitespace:
// Match space-characters.
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
// Check range 0x09..0x0D.
__ Dsubu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
return false;
case StandardCharacterSet::kNotWhitespace:
// The emitted code for generic character classes is good enough.
return false;
case StandardCharacterSet::kDigit:
// Match Latin1 digits ('0'..'9').
__ Dsubu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
return true;
case StandardCharacterSet::kNotDigit:
// Match non Latin1-digits.
__ Dsubu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case StandardCharacterSet::kNotLineTerminator: {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Dsubu(a0, a0, Operand(0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
return false;
case 'S':
// The emitted code for generic character classes is good enough.
return false;
case 'd':
// Match Latin1 digits ('0'..'9').
__ Dsubu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
return true;
case 'D':
// Match non Latin1-digits.
__ Dsubu(a0, current_character(), Operand('0'));
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
// Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Dsubu(a0, a0, Operand(0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
case StandardCharacterSet::kLineTerminator: {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Dsubu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
return true;
}
return true;
}
case 'n': {
// Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
__ Dsubu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
case StandardCharacterSet::kWord: {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
__ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
case StandardCharacterSet::kNotWord: {
Label done;
BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0B). I.e., check for
// 0x201D (0x2028 - 0x0B) or 0x201E.
__ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
__ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
if (mode_ != LATIN1) {
__ bind(&done);
}
return true;
}
return true;
}
case 'w': {
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
__ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
case 'W': {
Label done;
if (mode_ != LATIN1) {
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
ExternalReference map = ExternalReference::re_word_character_map();
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
__ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
if (mode_ != LATIN1) {
__ bind(&done);
}
return true;
}
case '*':
// Match any character.
return true;
// No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
case StandardCharacterSet::kEverything:
// Match any character.
return true;
}
}

View File

@ -54,7 +54,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(base::uc16 type, Label* on_no_match);
virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
Label* on_no_match);
virtual void Fail();
virtual Handle<HeapObject> GetCode(Handle<String> source);
virtual void GoTo(Label* label);