[runtime] Implement encodeURI as single runtime function.
Rewrite encodeURI as runtime function. We well probably repackage runtime_URIEncode as a C++ builtin. BUG=v8:4912 R=yangguo@chromium.org Review-Url: https://codereview.chromium.org/1968953002 Cr-Commit-Position: refs/heads/master@{#36257}
This commit is contained in:
parent
29611a952b
commit
6502a1bfb3
@ -539,13 +539,6 @@ static int SizeInHexChars(S number) {
|
||||
}
|
||||
|
||||
|
||||
static char HexCharOfValue(int value) {
|
||||
DCHECK(0 <= value && value <= 16);
|
||||
if (value < 10) return value + '0';
|
||||
return value - 10 + 'A';
|
||||
}
|
||||
|
||||
|
||||
bool Bignum::ToHexString(char* buffer, int buffer_size) const {
|
||||
DCHECK(IsClamped());
|
||||
// Each bigit must be printable as separate hex-character.
|
||||
|
@ -560,6 +560,11 @@ class Factory final {
|
||||
return NewRangeError(MessageTemplate::kInvalidStringLength);
|
||||
}
|
||||
|
||||
Handle<Object> NewURIError() {
|
||||
return NewError(isolate()->uri_error_function(),
|
||||
MessageTemplate::kURIMalformed);
|
||||
}
|
||||
|
||||
Handle<Object> NewError(Handle<JSFunction> constructor,
|
||||
MessageTemplate::Template template_index,
|
||||
Handle<Object> arg0 = Handle<Object>(),
|
||||
|
139
src/js/uri.js
139
src/js/uri.js
@ -37,72 +37,6 @@ function HexValueOf(code) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Does the char code correspond to an alpha-numeric char.
|
||||
function isAlphaNumeric(cc) {
|
||||
// a - z
|
||||
if (97 <= cc && cc <= 122) return true;
|
||||
// A - Z
|
||||
if (65 <= cc && cc <= 90) return true;
|
||||
// 0 - 9
|
||||
if (48 <= cc && cc <= 57) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Lazily initialized.
|
||||
var hexCharCodeArray = 0;
|
||||
|
||||
function URIAddEncodedOctetToBuffer(octet, result, index) {
|
||||
result[index++] = 37; // Char code of '%'.
|
||||
result[index++] = hexCharCodeArray[octet >> 4];
|
||||
result[index++] = hexCharCodeArray[octet & 0x0F];
|
||||
return index;
|
||||
}
|
||||
|
||||
function URIEncodeOctets(octets, result, index) {
|
||||
if (hexCharCodeArray === 0) {
|
||||
hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
|
||||
65, 66, 67, 68, 69, 70];
|
||||
}
|
||||
index = URIAddEncodedOctetToBuffer(octets[0], result, index);
|
||||
if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
|
||||
if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
|
||||
if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
|
||||
return index;
|
||||
}
|
||||
|
||||
function URIEncodeSingle(cc, result, index) {
|
||||
var x = (cc >> 12) & 0xF;
|
||||
var y = (cc >> 6) & 63;
|
||||
var z = cc & 63;
|
||||
var octets = new InternalArray(3);
|
||||
if (cc <= 0x007F) {
|
||||
octets[0] = cc;
|
||||
} else if (cc <= 0x07FF) {
|
||||
octets[0] = y + 192;
|
||||
octets[1] = z + 128;
|
||||
} else {
|
||||
octets[0] = x + 224;
|
||||
octets[1] = y + 128;
|
||||
octets[2] = z + 128;
|
||||
}
|
||||
return URIEncodeOctets(octets, result, index);
|
||||
}
|
||||
|
||||
function URIEncodePair(cc1 , cc2, result, index) {
|
||||
var u = ((cc1 >> 6) & 0xF) + 1;
|
||||
var w = (cc1 >> 2) & 0xF;
|
||||
var x = cc1 & 3;
|
||||
var y = (cc2 >> 6) & 0xF;
|
||||
var z = cc2 & 63;
|
||||
var octets = new InternalArray(4);
|
||||
octets[0] = (u >> 2) + 240;
|
||||
octets[1] = (((u & 3) << 4) | w) + 128;
|
||||
octets[2] = ((x << 4) | y) + 128;
|
||||
octets[3] = z + 128;
|
||||
return URIEncodeOctets(octets, result, index);
|
||||
}
|
||||
|
||||
function URIHexCharsToCharCode(highChar, lowChar) {
|
||||
var highCode = HexValueOf(highChar);
|
||||
var lowCode = HexValueOf(lowChar);
|
||||
@ -167,37 +101,6 @@ function URIDecodeOctets(octets, result, index) {
|
||||
return index;
|
||||
}
|
||||
|
||||
// ECMA-262, section 15.1.3
|
||||
function Encode(uri, unescape) {
|
||||
uri = TO_STRING(uri);
|
||||
var uriLength = uri.length;
|
||||
var array = new InternalArray(uriLength);
|
||||
var index = 0;
|
||||
for (var k = 0; k < uriLength; k++) {
|
||||
var cc1 = %_StringCharCodeAt(uri, k);
|
||||
if (unescape(cc1)) {
|
||||
array[index++] = cc1;
|
||||
} else {
|
||||
if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError();
|
||||
if (cc1 < 0xD800 || cc1 > 0xDBFF) {
|
||||
index = URIEncodeSingle(cc1, array, index);
|
||||
} else {
|
||||
k++;
|
||||
if (k == uriLength) throw MakeURIError();
|
||||
var cc2 = %_StringCharCodeAt(uri, k);
|
||||
if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError();
|
||||
index = URIEncodePair(cc1, cc2, array, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
|
||||
for (var i = 0; i < array.length; i++) {
|
||||
%_OneByteSeqStringSetChar(i, array[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ECMA-262, section 15.1.3
|
||||
function Decode(uri, reserved) {
|
||||
uri = TO_STRING(uri);
|
||||
@ -318,48 +221,14 @@ function URIDecodeComponent(component) {
|
||||
|
||||
// ECMA-262 - 15.1.3.3.
|
||||
function URIEncode(uri) {
|
||||
var unescapePredicate = function(cc) {
|
||||
if (isAlphaNumeric(cc)) return true;
|
||||
// !
|
||||
if (cc == 33) return true;
|
||||
// #$
|
||||
if (35 <= cc && cc <= 36) return true;
|
||||
// &'()*+,-./
|
||||
if (38 <= cc && cc <= 47) return true;
|
||||
// :;
|
||||
if (58 <= cc && cc <= 59) return true;
|
||||
// =
|
||||
if (cc == 61) return true;
|
||||
// ?@
|
||||
if (63 <= cc && cc <= 64) return true;
|
||||
// _
|
||||
if (cc == 95) return true;
|
||||
// ~
|
||||
if (cc == 126) return true;
|
||||
|
||||
return false;
|
||||
};
|
||||
return Encode(uri, unescapePredicate);
|
||||
uri = TO_STRING(uri);
|
||||
return %URIEncode(uri, true);
|
||||
}
|
||||
|
||||
// ECMA-262 - 15.1.3.4
|
||||
function URIEncodeComponent(component) {
|
||||
var unescapePredicate = function(cc) {
|
||||
if (isAlphaNumeric(cc)) return true;
|
||||
// !
|
||||
if (cc == 33) return true;
|
||||
// '()*
|
||||
if (39 <= cc && cc <= 42) return true;
|
||||
// -.
|
||||
if (45 <= cc && cc <= 46) return true;
|
||||
// _
|
||||
if (cc == 95) return true;
|
||||
// ~
|
||||
if (cc == 126) return true;
|
||||
|
||||
return false;
|
||||
};
|
||||
return Encode(component, unescapePredicate);
|
||||
component = TO_STRING(component);
|
||||
return %URIEncode(component, false);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "src/runtime/runtime-utils.h"
|
||||
|
||||
#include "src/arguments.h"
|
||||
#include "src/char-predicates-inl.h"
|
||||
#include "src/regexp/jsregexp-inl.h"
|
||||
#include "src/string-builder.h"
|
||||
#include "src/string-search.h"
|
||||
@ -1151,6 +1152,132 @@ RUNTIME_FUNCTION(Runtime_NewString) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
// anonymous namespace for URIEncode helper functions
|
||||
namespace {
|
||||
|
||||
bool IsUnescapePredicateInUriComponent(uc16 c) {
|
||||
if (IsAlphaNumeric(c)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '!':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '-':
|
||||
case '.':
|
||||
case '_':
|
||||
case '~':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsUriSeparator(uc16 c) {
|
||||
switch (c) {
|
||||
case '#':
|
||||
case ':':
|
||||
case ';':
|
||||
case '/':
|
||||
case '?':
|
||||
case '$':
|
||||
case '&':
|
||||
case '+':
|
||||
case ',':
|
||||
case '@':
|
||||
case '=':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
|
||||
buffer->Add('%');
|
||||
buffer->Add(HexCharOfValue(octet >> 4));
|
||||
buffer->Add(HexCharOfValue(octet & 0x0F));
|
||||
}
|
||||
|
||||
void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
|
||||
uint8_t x = (c >> 12) & 0xF;
|
||||
uint8_t y = (c >> 6) & 63;
|
||||
uint8_t z = c & 63;
|
||||
if (c <= 0x007F) {
|
||||
AddHexEncodedToBuffer(c, buffer);
|
||||
} else if (c <= 0x07FF) {
|
||||
AddHexEncodedToBuffer(y + 192, buffer);
|
||||
AddHexEncodedToBuffer(z + 128, buffer);
|
||||
} else {
|
||||
AddHexEncodedToBuffer(x + 224, buffer);
|
||||
AddHexEncodedToBuffer(y + 128, buffer);
|
||||
AddHexEncodedToBuffer(z + 128, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
|
||||
uint8_t u = ((cc1 >> 6) & 0xF) + 1;
|
||||
uint8_t w = (cc1 >> 2) & 0xF;
|
||||
uint8_t x = cc1 & 3;
|
||||
uint8_t y = (cc2 >> 6) & 0xF;
|
||||
uint8_t z = cc2 & 63;
|
||||
AddHexEncodedToBuffer((u >> 2) + 240, buffer);
|
||||
AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer);
|
||||
AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
|
||||
AddHexEncodedToBuffer(z + 128, buffer);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_URIEncode) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK(args.length() == 2);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, uri, 0);
|
||||
CONVERT_BOOLEAN_ARG_CHECKED(is_uri, 1);
|
||||
|
||||
uri = String::Flatten(uri);
|
||||
int uri_length = uri->length();
|
||||
List<uint8_t> buffer(uri_length);
|
||||
|
||||
{
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent uri_content = uri->GetFlatContent();
|
||||
|
||||
for (int k = 0; k < uri_length; k++) {
|
||||
uc16 cc1 = uri_content.Get(k);
|
||||
if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
|
||||
k++;
|
||||
if (k < uri_length) {
|
||||
uc16 cc2 = uri->Get(k);
|
||||
if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
|
||||
EncodePair(cc1, cc2, &buffer);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
|
||||
if (IsUnescapePredicateInUriComponent(cc1) ||
|
||||
(is_uri && IsUriSeparator(cc1))) {
|
||||
buffer.Add(cc1);
|
||||
} else {
|
||||
EncodeSingle(cc1, &buffer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
AllowHeapAllocation allocate_error_and_return;
|
||||
THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());
|
||||
}
|
||||
}
|
||||
|
||||
Handle<String> result;
|
||||
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
||||
isolate, result,
|
||||
isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()));
|
||||
return *result;
|
||||
}
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringLessThan) {
|
||||
HandleScope handle_scope(isolate);
|
||||
DCHECK_EQ(2, args.length());
|
||||
|
@ -836,6 +836,7 @@ namespace internal {
|
||||
F(StringTrim, 3, 1) \
|
||||
F(TruncateString, 2, 1) \
|
||||
F(NewString, 2, 1) \
|
||||
F(URIEncode, 2, 1) \
|
||||
F(StringLessThan, 2, 1) \
|
||||
F(StringLessThanOrEqual, 2, 1) \
|
||||
F(StringGreaterThan, 2, 1) \
|
||||
|
@ -37,6 +37,11 @@ inline int HexValue(uc32 c) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline char HexCharOfValue(int value) {
|
||||
DCHECK(0 <= value && value <= 16);
|
||||
if (value < 10) return value + '0';
|
||||
return value - 10 + 'A';
|
||||
}
|
||||
|
||||
inline int BoolToInt(bool b) { return b ? 1 : 0; }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user