Add Base64 VLQ decoding support

This CL adds the support for Base64 VLQ decoding, which is the base of
parsing source map files (Version 3). With this support, the mappings of
C/C++ source code and WASM bytecode could be built in V8 engine. The
newly-added function is called VLQBase64Decode, which accepts two
character to be decoded. Upon its return, the position is updated with
the next start position. The unittest of this support is also added in
this CL.

argument: the Base64-encoded VLQ string and the position of first
Change-Id: If0f32972ecd7488844478a7b93a0f10cc38b6a5d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1657421
Reviewed-by: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: Ben Titzer <titzer@chromium.org>
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Cr-Commit-Position: refs/heads/master@{#62748}
This commit is contained in:
zhiguo 2019-07-16 22:22:44 +08:00 committed by Commit Bot
parent 2547a664ae
commit ed9154168f
5 changed files with 221 additions and 0 deletions

View File

@ -3499,6 +3499,8 @@ v8_component("v8_libbase") {
"src/base/type-traits.h",
"src/base/utils/random-number-generator.cc",
"src/base/utils/random-number-generator.h",
"src/base/vlq-base64.cc",
"src/base/vlq-base64.h",
]
configs = [ ":internal_config_base" ]

58
src/base/vlq-base64.cc Normal file
View File

@ -0,0 +1,58 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <climits>
#include <limits>
#include "src/base/logging.h"
#include "src/base/vlq-base64.h"
namespace v8 {
namespace base {
namespace {
constexpr int8_t kCharToDigit[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, 0x3e, -1, -1, -1, 0x3f,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, -1, -1,
-1, -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, -1, -1, -1, -1, -1,
-1, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
0x31, 0x32, 0x33, -1, -1, -1, -1, -1};
constexpr uint32_t kContinueShift = 5;
constexpr uint32_t kContinueMask = 1 << kContinueShift;
constexpr uint32_t kDataMask = kContinueMask - 1;
int8_t charToDigitDecode(uint8_t c) { return c < 128u ? kCharToDigit[c] : -1; }
} // namespace
int8_t charToDigitDecodeForTesting(uint8_t c) { return charToDigitDecode(c); }
int32_t VLQBase64Decode(const char* start, size_t sz, size_t* pos) {
uint32_t res = 0;
uint64_t shift = 0;
int32_t digit;
do {
if (*pos >= sz) {
return std::numeric_limits<int32_t>::min();
}
digit = static_cast<int>(charToDigitDecode(start[*pos]));
bool is_last_byte = (shift + kContinueShift >= 32);
if (digit == -1 || (is_last_byte && (digit >> 2) != 0)) {
return std::numeric_limits<int32_t>::min();
}
res += (digit & kDataMask) << shift;
shift += kContinueShift;
(*pos)++;
} while (digit & kContinueMask);
return (res & 1) ? -static_cast<int32_t>(res >> 1) : (res >> 1);
}
} // namespace base
} // namespace v8

23
src/base/vlq-base64.h Normal file
View File

@ -0,0 +1,23 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_BASE_VLQ_BASE64_H_
#define V8_BASE_VLQ_BASE64_H_
#include <string>
#include "src/base/base-export.h"
namespace v8 {
namespace base {
V8_BASE_EXPORT int8_t charToDigitDecodeForTesting(uint8_t c);
// Decodes a VLQ-Base64-encoded string into 32bit digits. A valid return value
// is within [-2^31+1, 2^31-1]. This function returns -2^31
// (std::numeric_limits<int32_t>::min()) when bad input s is passed.
V8_BASE_EXPORT int32_t VLQBase64Decode(const char* start, size_t sz,
size_t* pos);
} // namespace base
} // namespace v8
#endif // V8_BASE_VLQ_BASE64_H_

View File

@ -77,6 +77,7 @@ v8_source_set("unittests_sources") {
"base/template-utils-unittest.cc",
"base/threaded-list-unittest.cc",
"base/utils/random-number-generator-unittest.cc",
"base/vlq-base64-unittest.cc",
"codegen/code-stub-assembler-unittest.cc",
"codegen/code-stub-assembler-unittest.h",
"codegen/register-configuration-unittest.cc",

View File

@ -0,0 +1,137 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <cstring>
#include <initializer_list>
#include <limits>
#include "src/base/vlq-base64.h"
#include "testing/gtest-support.h"
namespace v8 {
namespace base {
TEST(VLQBASE64, charToDigit) {
char kSyms[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
for (int i = 0; i < 256; ++i) {
char* pos = strchr(kSyms, static_cast<char>(i));
int8_t expected = i == 0 || pos == nullptr ? -1 : pos - kSyms;
EXPECT_EQ(expected, charToDigitDecodeForTesting(static_cast<uint8_t>(i)));
}
}
struct ExpectedVLQBase64Result {
size_t pos;
int32_t result;
};
void TestVLQBase64Decode(
const char* str,
std::initializer_list<ExpectedVLQBase64Result> expected_results) {
size_t pos = 0;
for (const auto& expect : expected_results) {
int32_t result = VLQBase64Decode(str, strlen(str), &pos);
EXPECT_EQ(expect.result, result);
EXPECT_EQ(expect.pos, pos);
}
}
TEST(VLQBASE64, DecodeOneSegment) {
TestVLQBase64Decode("", {{0, std::numeric_limits<int32_t>::min()}});
// Unsupported symbol.
TestVLQBase64Decode("*", {{0, std::numeric_limits<int32_t>::min()}});
TestVLQBase64Decode("&", {{0, std::numeric_limits<int32_t>::min()}});
TestVLQBase64Decode("kt:", {{2, std::numeric_limits<int32_t>::min()}});
TestVLQBase64Decode("k^C", {{1, std::numeric_limits<int32_t>::min()}});
// Imcomplete string.
TestVLQBase64Decode("kth4yp", {{6, std::numeric_limits<int32_t>::min()}});
// Interpretable strings.
TestVLQBase64Decode("A", {{1, 0}});
TestVLQBase64Decode("C", {{1, 1}});
TestVLQBase64Decode("Y", {{1, 12}});
TestVLQBase64Decode("2H", {{2, 123}});
TestVLQBase64Decode("ktC", {{3, 1234}});
TestVLQBase64Decode("yjY", {{3, 12345}});
TestVLQBase64Decode("gkxH", {{4, 123456}});
TestVLQBase64Decode("uorrC", {{5, 1234567}});
TestVLQBase64Decode("80wxX", {{5, 12345678}});
TestVLQBase64Decode("qxmvrH", {{6, 123456789}});
TestVLQBase64Decode("kth4ypC", {{7, 1234567890}});
TestVLQBase64Decode("+/////D", {{7, std::numeric_limits<int32_t>::max()}});
TestVLQBase64Decode("D", {{1, -1}});
TestVLQBase64Decode("Z", {{1, -12}});
TestVLQBase64Decode("3H", {{2, -123}});
TestVLQBase64Decode("ltC", {{3, -1234}});
TestVLQBase64Decode("zjY", {{3, -12345}});
TestVLQBase64Decode("hkxH", {{4, -123456}});
TestVLQBase64Decode("vorrC", {{5, -1234567}});
TestVLQBase64Decode("90wxX", {{5, -12345678}});
TestVLQBase64Decode("rxmvrH", {{6, -123456789}});
TestVLQBase64Decode("lth4ypC", {{7, -1234567890}});
TestVLQBase64Decode("//////D", {{7, -std::numeric_limits<int32_t>::max()}});
// An overflowed value 12345678901 (0x2DFDC1C35).
TestVLQBase64Decode("qjuw7/2A", {{6, std::numeric_limits<int32_t>::min()}});
// An overflowed value 123456789012(0x1CBE991A14).
TestVLQBase64Decode("ohtkz+lH", {{6, std::numeric_limits<int32_t>::min()}});
// An overflowed value 4294967296 (0x100000000).
TestVLQBase64Decode("ggggggE", {{6, std::numeric_limits<int32_t>::min()}});
// An overflowed value -12345678901, |value| = (0x2DFDC1C35).
TestVLQBase64Decode("rjuw7/2A", {{6, std::numeric_limits<int32_t>::min()}});
// An overflowed value -123456789012,|value| = (0x1CBE991A14).
TestVLQBase64Decode("phtkz+lH", {{6, std::numeric_limits<int32_t>::min()}});
// An overflowed value -4294967296, |value| = (0x100000000).
TestVLQBase64Decode("hgggggE", {{6, std::numeric_limits<int32_t>::min()}});
}
TEST(VLQBASE64, DecodeTwoSegment) {
TestVLQBase64Decode("AA", {{1, 0}, {2, 0}});
TestVLQBase64Decode("KA", {{1, 5}, {2, 0}});
TestVLQBase64Decode("AQ", {{1, 0}, {2, 8}});
TestVLQBase64Decode("MG", {{1, 6}, {2, 3}});
TestVLQBase64Decode("a4E", {{1, 13}, {3, 76}});
TestVLQBase64Decode("4GyO", {{2, 108}, {4, 233}});
TestVLQBase64Decode("ggEqnD", {{3, 2048}, {6, 1653}});
TestVLQBase64Decode("g2/D0ilF", {{4, 65376}, {8, 84522}});
TestVLQBase64Decode("ss6gBy0m3B", {{5, 537798}, {10, 904521}});
TestVLQBase64Decode("LA", {{1, -5}, {2, 0}});
TestVLQBase64Decode("AR", {{1, 0}, {2, -8}});
TestVLQBase64Decode("NH", {{1, -6}, {2, -3}});
TestVLQBase64Decode("b5E", {{1, -13}, {3, -76}});
TestVLQBase64Decode("5GzO", {{2, -108}, {4, -233}});
TestVLQBase64Decode("hgErnD", {{3, -2048}, {6, -1653}});
TestVLQBase64Decode("h2/D1ilF", {{4, -65376}, {8, -84522}});
TestVLQBase64Decode("ts6gBz0m3B", {{5, -537798}, {10, -904521}});
TestVLQBase64Decode("4GzO", {{2, 108}, {4, -233}});
TestVLQBase64Decode("ggErnD", {{3, 2048}, {6, -1653}});
TestVLQBase64Decode("g2/D1ilF", {{4, 65376}, {8, -84522}});
TestVLQBase64Decode("ss6gBz0m3B", {{5, 537798}, {10, -904521}});
TestVLQBase64Decode("5GyO", {{2, -108}, {4, 233}});
TestVLQBase64Decode("hgEqnD", {{3, -2048}, {6, 1653}});
TestVLQBase64Decode("h2/D0ilF", {{4, -65376}, {8, 84522}});
TestVLQBase64Decode("ts6gBy0m3B", {{5, -537798}, {10, 904521}});
}
TEST(VLQBASE64, DecodeFourSegment) {
TestVLQBase64Decode("AAAA", {{1, 0}, {2, 0}, {3, 0}, {4, 0}});
TestVLQBase64Decode("QADA", {{1, 8}, {2, 0}, {3, -1}, {4, 0}});
TestVLQBase64Decode("ECQY", {{1, 2}, {2, 1}, {3, 8}, {4, 12}});
TestVLQBase64Decode("goGguCioPk9I",
{{3, 3200}, {6, 1248}, {9, 7809}, {12, 4562}});
TestVLQBase64Decode("6/BACA", {{3, 1021}, {4, 0}, {5, 1}, {6, 0}});
TestVLQBase64Decode("urCAQA", {{3, 1207}, {4, 0}, {5, 8}, {6, 0}});
TestVLQBase64Decode("sDACA", {{2, 54}, {3, 0}, {4, 1}, {5, 0}});
}
} // namespace base
} // namespace v8