tools: Accept hex representation as binary input (#5870)

Sometimes when debugging or logging, SPIR-V may be dumped as a stream of
hex values.  There are tools to convert such a stream to binary
(such as [1]) but they create an inconvenient extra step when for
example the disassembly of that hex stream is needed.

[1]: https://www.khronos.org/spir/visualizer/hexdump.html

In this change, the binary reader used by the tools is enhanced to
detect when the binary is actually a hex stream, and parse that instead.
The following formats are accepted, detected based on how the SPIR-V
magic number is output:

=== Words

If the first token of the hex stream is one of 0x07230203, 0x7230203,
x07230203, or x7230203, the hex stream is expected to consist of 32-bit
hex words prefixed with 0x or x.  For example:

    0x7230203, 0x10400, 0x180001, 0x79, 0x0

is parsed as:

    0x07230203 0x00010400 0x00180001 0x00000079 0x00000000

Note that `,` is optional in the stream, but the hex values are expected
to be delimited by either `,` or whitespace.

=== Bytes With Prefix

If the first token of the hex stream is one of 0x07, 0x7, x07, x7, 0x03,
0x3, x03, or x3, the hex stream is expected to consist of 8-bit hex
bytes prefixed with 0x or x.  If the first token has a value of 7, the
stream is big-endian.  Otherwise it's little-endian.  For example:

    0x3, 0x2, 0x23, 0x7, 0x0, 0x4, 0x1, 0x0, 0x1, 0x0, 0x18, 0x0, 0x79, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0

is parsed as:

    0x07230203 0x00010400 0x00180001 0x00000079 0x00000000

Similar to "Words", `,` is optional in the stream, but the hex values
are expected to be delimited by either `,` or whitespace.

=== Bytes Without Prefix

If the first two characters of the hex stream is 07, or 03, the hex
stream is expected to consist of 8-bit hex bytes of 2 characters each.
If the first token is 07, the stream is big-endian.  Otherwise it's
little-endian.  Unlike the other modes, delimiter is optional (which
automatically handles 32-bit word streams), but no 0-padding is done.
For example, all of the following:

    03, 02, 23, 07, 00, 04, 01, 00, 01, 00, 18, 00, 79, 00, 00, 00, 00, 00, 00, 00
    03 02 23 07 00 04 01 00 01 00 18 00 79 00 00 00 00 00 00 00
    03022307 00040100 01001800 79000000 00000000
    07,23,02,03,00,01,04,00,00,18,00,01,00,00,00,79,00,00,00,00
    07230203, 00010400, 00180001, 00000079, 00000000

are parsed as:

    0x07230203 0x00010400 0x00180001 0x00000079 0x00000000
This commit is contained in:
Shahbaz Youssefi 2024-11-04 09:57:37 -05:00 committed by GitHub
parent d426fc53bb
commit 02433568af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 777 additions and 17 deletions

View File

@ -447,6 +447,7 @@ cc_library(
"tools_util",
":spirv_tools_internal",
":test_lib",
":tools_io",
"@googletest//:gtest",
"@googletest//:gtest_main",
],

View File

@ -1377,6 +1377,7 @@ if (build_with_chromium && spvtools_build_executables) {
"test/fix_word_test.cpp",
"test/generator_magic_number_test.cpp",
"test/hex_float_test.cpp",
"test/hex_to_text_test.cpp",
"test/immediate_int_test.cpp",
"test/libspirv_macros_test.cpp",
"test/name_mapper_test.cpp",
@ -1424,6 +1425,7 @@ if (build_with_chromium && spvtools_build_executables) {
":spvtools_language_header_cldebuginfo100",
":spvtools_language_header_debuginfo",
":spvtools_language_header_vkdebuginfo100",
":spvtools_tools_io",
":spvtools_val",
"//testing/gmock",
"//testing/gtest",

View File

@ -80,6 +80,8 @@ further notice.
* Assembler only does basic syntax checking. No cross validation of
IDs or types is performed, except to check literal arguments to
`OpConstant`, `OpSpecConstant`, and `OpSwitch`.
* Where tools expect binary input, a hex stream may be provided instead. See
`spirv-dis --help`.
See [`docs/syntax.md`](docs/syntax.md) for the assembly language syntax.

View File

@ -88,6 +88,7 @@ endfunction()
set(TEST_SOURCES
test_fixture.h
unit_spirv.h
${spirv-tools_SOURCE_DIR}/tools/io.h
assembly_context_test.cpp
assembly_format_test.cpp
@ -110,6 +111,7 @@ set(TEST_SOURCES
fix_word_test.cpp
generator_magic_number_test.cpp
hex_float_test.cpp
hex_to_text_test.cpp
immediate_int_test.cpp
libspirv_macros_test.cpp
named_id_test.cpp
@ -154,6 +156,7 @@ set(TEST_SOURCES
to_string_test.cpp
unit_spirv.cpp
${spirv-tools_SOURCE_DIR}/tools/io.cpp
)
spvtools_pch(TEST_SOURCES pch_test)

View File

@ -20,7 +20,6 @@
#include "source/opt/ir_context.h"
#include "source/spirv_constant.h"
#include "spirv-tools/libspirv.hpp"
#include "tools/io.h"
#include "tools/util/cli_consumer.h"
#include <fstream>

View File

@ -18,7 +18,6 @@
#include "source/opt/ir_context.h"
#include "spirv-tools/libspirv.hpp"
#include "tools/io.h"
#include "tools/util/cli_consumer.h"
#include "gtest/gtest.h"

429
test/hex_to_text_test.cpp Normal file
View File

@ -0,0 +1,429 @@
// Copyright (c) 2024 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "gmock/gmock.h"
#include "test/test_fixture.h"
#include "tools/io.h"
namespace spvtools {
namespace {
using spvtest::ScopedContext;
class HexToText : public ::testing::Test {
public:
void VerifyDisassembly(const char* hex_stream,
const char* expected_disassembly) {
std::vector<char> stream(hex_stream, hex_stream + strlen(hex_stream));
std::vector<uint32_t> binary;
// Convert hext to binary first.
EXPECT_TRUE(ConvertHexToBinary(stream, &binary));
// Then disassemble it.
spv_diagnostic diagnostic = nullptr;
spv_text disassembly = nullptr;
EXPECT_EQ(spvBinaryToText(ScopedContext().context, binary.data(),
binary.size(), SPV_BINARY_TO_TEXT_OPTION_NONE,
&disassembly, &diagnostic),
SPV_SUCCESS);
EXPECT_EQ(diagnostic, nullptr);
// Verify disassembly is as expected and clean up.
EXPECT_STREQ(disassembly->str, expected_disassembly);
spvDiagnosticDestroy(diagnostic);
spvTextDestroy(disassembly);
}
void EnsureError(const char* hex_stream) {
std::vector<char> stream(hex_stream, hex_stream + strlen(hex_stream));
std::vector<uint32_t> binary;
// Make sure there is a parse error
EXPECT_FALSE(ConvertHexToBinary(stream, &binary));
}
};
// The actual assembly doesn't matter, just the hex parsing. All the tests use
// the following SPIR-V.
constexpr char kDisassembly[] = R"(; SPIR-V
; Version: 1.6
; Generator: Khronos SPIR-V Tools Assembler; 0
; Bound: 11
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %1 "main" %2 %3
OpName %2 "fancy_attribute"
OpName %3 "useful_output"
OpDecorate %2 Location 4
OpDecorate %3 Location 2
%4 = OpTypeFloat 32
%5 = OpTypePointer Input %4
%2 = OpVariable %5 Input
%6 = OpTypePointer Output %4
%3 = OpVariable %6 Output
%7 = OpTypeVoid
%8 = OpTypeFunction %7
%1 = OpFunction %7 None %8
%9 = OpLabel
%10 = OpLoad %4 %2
OpStore %3 %10
OpReturn
OpFunctionEnd
)";
TEST_F(HexToText, Words) {
constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
0x00000000, 0x00020011, 0x00000001, 0x0003000e
0x00000000, 0x00000001, 0x0007000f, 0x00000000
0x00000001, 0x6e69616d, 0x00000000, 0x00000002
0x00000003, 0x00060005, 0x00000002, 0x636e6166
0x74615f79, 0x62697274, 0x00657475, 0x00060005
0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
0x00000074, 0x00040047, 0x00000002, 0x0000001e
0x00000004, 0x00040047, 0x00000003, 0x0000001e
0x00000002, 0x00030016, 0x00000004, 0x00000020
0x00040020, 0x00000005, 0x00000001, 0x00000004
0x0004003b, 0x00000005, 0x00000002, 0x00000001
0x00040020, 0x00000006, 0x00000003, 0x00000004
0x0004003b, 0x00000006, 0x00000003, 0x00000003
0x00020013, 0x00000007, 0x00030021, 0x00000008
0x00000007, 0x00050036, 0x00000007, 0x00000001
0x00000000, 0x00000008, 0x000200f8, 0x00000009
0x0004003d, 0x00000004, 0x0000000a, 0x00000002
0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
0x00010038)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, WordsLeadingSpace) {
constexpr char kHex[] = R"(
x07230203, x00010600, x00070000, x0000000b
x00000000, x00020011, x00000001, x0003000e
x00000000, x00000001, x0007000f, x00000000
x00000001, x6e69616d, x00000000, x00000002
x00000003, x00060005, x00000002, x636e6166
x74615f79, x62697274, x00657475, x00060005
x00000003, x66657375, x6f5f6c75, x75707475
x00000074, x00040047, x00000002, x0000001e
x00000004, x00040047, x00000003, x0000001e
x00000002, x00030016, x00000004, x00000020
x00040020, x00000005, x00000001, x00000004
x0004003b, x00000005, x00000002, x00000001
x00040020, x00000006, x00000003, x00000004
x0004003b, x00000006, x00000003, x00000003
x00020013, x00000007, x00030021, x00000008
x00000007, x00050036, x00000007, x00000001
x00000000, x00000008, x000200f8, x00000009
x0004003d, x00000004, x0000000a, x00000002
x0003003e, x00000003, x0000000a, x000100fd
x00010038)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, WordsTrailingSpace) {
constexpr char kHex[] = R"(0X7230203, 0X10600, 0X70000, 0XB
0X0, 0X20011, 0X1, 0X3000E
0X0, 0X1, 0X7000F, 0X0
0X1, X6E69616D, 0X0, 0X2
0X3, 0X60005, 0X2, X636E6166
X74615F79, X62697274, 0X657475, 0X60005
0X3, X66657375, X6F5F6C75, X75707475
0X74, 0X40047, 0X2, 0X1E
0X4, 0X40047, 0X3, 0X1E
0X2, 0X30016, 0X4, 0X20
0X40020, 0X5, 0X1, 0X4
0X4003B, 0X5, 0X2, 0X1
0X40020, 0X6, 0X3, 0X4
0X4003B, 0X6, 0X3, 0X3
0X20013, 0X7, 0X30021, 0X8
0X7, 0X50036, 0X7, 0X1
0X0, 0X8, 0X200F8, 0X9
0X4003D, 0X4, 0XA, 0X2
0X3003E, 0X3, 0XA, 0X100FD
0X10038
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, BytesLittleEndian) {
constexpr char kHex[] = R"(
0x03 0x02 0x23 0x07 0x00 0x06 0x01 0x00 0x00 0x00 0x07 0x00 0x0b 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x11 0x00 0x02 0x00 0x01 0x00 0x00 0x00 0x0e 0x00 0x03 0x00
0x00 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x0f 0x00 0x07 0x00 0x00 0x00 0x00 0x00
0x01 0x00 0x00 0x00 0x6d 0x61 0x69 0x6e 0x00 0x00 0x00 0x00 0x02 0x00 0x00 0x00
0x03 0x00 0x00 0x00 0x05 0x00 0x06 0x00 0x02 0x00 0x00 0x00 0x66 0x61 0x6e 0x63
0x79 0x5f 0x61 0x74 0x74 0x72 0x69 0x62 0x75 0x74 0x65 0x00 0x05 0x00 0x06 0x00
0x03 0x00 0x00 0x00 0x75 0x73 0x65 0x66 0x75 0x6c 0x5f 0x6f 0x75 0x74 0x70 0x75
0x74 0x00 0x00 0x00 0x47 0x00 0x04 0x00 0x02 0x00 0x00 0x00 0x1e 0x00 0x00 0x00
0x04 0x00 0x00 0x00 0x47 0x00 0x04 0x00 0x03 0x00 0x00 0x00 0x1e 0x00 0x00 0x00
0x02 0x00 0x00 0x00 0x16 0x00 0x03 0x00 0x04 0x00 0x00 0x00 0x20 0x00 0x00 0x00
0x20 0x00 0x04 0x00 0x05 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x04 0x00 0x00 0x00
0x3b 0x00 0x04 0x00 0x05 0x00 0x00 0x00 0x02 0x00 0x00 0x00 0x01 0x00 0x00 0x00
0x20 0x00 0x04 0x00 0x06 0x00 0x00 0x00 0x03 0x00 0x00 0x00 0x04 0x00 0x00 0x00
0x3b 0x00 0x04 0x00 0x06 0x00 0x00 0x00 0x03 0x00 0x00 0x00 0x03 0x00 0x00 0x00
0x13 0x00 0x02 0x00 0x07 0x00 0x00 0x00 0x21 0x00 0x03 0x00 0x08 0x00 0x00 0x00
0x07 0x00 0x00 0x00 0x36 0x00 0x05 0x00 0x07 0x00 0x00 0x00 0x01 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x08 0x00 0x00 0x00 0xf8 0x00 0x02 0x00 0x09 0x00 0x00 0x00
0x3d 0x00 0x04 0x00 0x04 0x00 0x00 0x00 0x0a 0x00 0x00 0x00 0x02 0x00 0x00 0x00
0x3e 0x00 0x03 0x00 0x03 0x00 0x00 0x00 0x0a 0x00 0x00 0x00 0xfd 0x00 0x01 0x00
0x38 0x00 0x01 0x00
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, BytesBigEndian) {
constexpr char kHex[] = R"(
X07,X23,X02,X03, X00,X01,X06,X00, X00,X07,X00,X00, X00,X00,X00,X0B
X00,X00,X00,X00, X00,X02,X00,X11, X00,X00,X00,X01, X00,X03,X00,X0E
X00,X00,X00,X00, X00,X00,X00,X01, X00,X07,X00,X0F, X00,X00,X00,X00
X00,X00,X00,X01, X6E,X69,X61,X6D, X00,X00,X00,X00, X00,X00,X00,X02
X00,X00,X00,X03, X00,X06,X00,X05, X00,X00,X00,X02, X63,X6E,X61,X66
X74,X61,X5F,X79, X62,X69,X72,X74, X00,X65,X74,X75, X00,X06,X00,X05
X00,X00,X00,X03, X66,X65,X73,X75, X6F,X5F,X6C,X75, X75,X70,X74,X75
X00,X00,X00,X74, X00,X04,X00,X47, X00,X00,X00,X02, X00,X00,X00,X1E
X00,X00,X00,X04, X00,X04,X00,X47, X00,X00,X00,X03, X00,X00,X00,X1E
X00,X00,X00,X02, X00,X03,X00,X16, X00,X00,X00,X04, X00,X00,X00,X20
X00,X04,X00,X20, X00,X00,X00,X05, X00,X00,X00,X01, X00,X00,X00,X04
X00,X04,X00,X3B, X00,X00,X00,X05, X00,X00,X00,X02, X00,X00,X00,X01
X00,X04,X00,X20, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X04
X00,X04,X00,X3B, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X03
X00,X02,X00,X13, X00,X00,X00,X07, X00,X03,X00,X21, X00,X00,X00,X08
X00,X00,X00,X07, X00,X05,X00,X36, X00,X00,X00,X07, X00,X00,X00,X01
X00,X00,X00,X00, X00,X00,X00,X08, X00,X02,X00,XF8, X00,X00,X00,X09
X00,X04,X00,X3D, X00,X00,X00,X04, X00,X00,X00,X0A, X00,X00,X00,X02
X00,X03,X00,X3E, X00,X00,X00,X03, X00,X00,X00,X0A, X00,X01,X00,XFD
X00,X01,X00,X38,
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, StreamLittleEndian) {
constexpr char kHex[] = R"(
03 02 23 07 00 06 01 00 00 00 07 00 0b 00 00 00
00 00 00 00 11 00 02 00 01 00 00 00 0e 00 03 00
00 00 00 00 01 00 00 00 0f 00 07 00 00 00 00 00
01 00 00 00 6d 61 69 6e 00 00 00 00 02 00 00 00
03 00 00 00 05 00 06 00 02 00 00 00 66 61 6e 63
79 5f 61 74 74 72 69 62 75 74 65 00 05 00 06 00
03 00 00 00 75 73 65 66 75 6c 5f 6f 75 74 70 75
74 00 00 00 47 00 04 00 02 00 00 00 1e 00 00 00
04 00 00 00 47 00 04 00 03 00 00 00 1e 00 00 00
02 00 00 00 16 00 03 00 04 00 00 00 20 00 00 00
20 00 04 00 05 00 00 00 01 00 00 00 04 00 00 00
3b 00 04 00 05 00 00 00 02 00 00 00 01 00 00 00
20 00 04 00 06 00 00 00 03 00 00 00 04 00 00 00
3b 00 04 00 06 00 00 00 03 00 00 00 03 00 00 00
13 00 02 00 07 00 00 00 21 00 03 00 08 00 00 00
07 00 00 00 36 00 05 00 07 00 00 00 01 00 00 00
00 00 00 00 08 00 00 00 f8 00 02 00 09 00 00 00
3d 00 04 00 04 00 00 00 0a 00 00 00 02 00 00 00
3e 00 03 00 03 00 00 00 0a 00 00 00 fd 00 01 00
38 00 01 00
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, StreamLittleEndianNoDelim) {
constexpr char kHex[] = R"(
0302230700060100000007000B000000
0000000011000200010000000E000300
00000000010000000F00070000000000
010000006D61696E0000000002000000
03000000050006000200000066616E63
795F6174747269627574650005000600
0300000075736566756C5F6F75747075
7400000047000400020000001E000000
0400000047000400030000001E000000
02000000160003000400000020000000
20000400050000000100000004000000
3B000400050000000200000001000000
20000400060000000300000004000000
3B000400060000000300000003000000
13000200070000002100030008000000
07000000360005000700000001000000
0000000008000000F800020009000000
3D000400040000000A00000002000000
3E000300030000000A000000FD000100
38000100
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, StreamBigEndian) {
constexpr char kHex[] = R"(
07230203, 00010600, 00070000, 0000000b
00000000, 00020011, 00000001, 0003000e
00000000, 00000001, 0007000f, 00000000
00000001, 6e69616d, 00000000, 00000002
00000003, 00060005, 00000002, 636e6166
74615f79, 62697274, 00657475, 00060005
00000003, 66657375, 6f5f6c75, 75707475
00000074, 00040047, 00000002, 0000001e
00000004, 00040047, 00000003, 0000001e
00000002, 00030016, 00000004, 00000020
00040020, 00000005, 00000001, 00000004
0004003b, 00000005, 00000002, 00000001
00040020, 00000006, 00000003, 00000004
0004003b, 00000006, 00000003, 00000003
00020013, 00000007, 00030021, 00000008
00000007, 00050036, 00000007, 00000001
00000000, 00000008, 000200f8, 00000009
0004003d, 00000004, 0000000a, 00000002
0003003e, 00000003, 0000000a, 000100fd
00010038,
)";
VerifyDisassembly(kHex, kDisassembly);
}
TEST_F(HexToText, WordsNoDelimieter) {
constexpr char kHex[] = R"(0x07230203 0x00010600 0x00070000 0x0000000b
0x00000000 0x00020011 0x00000001 0x0003000e
0x00000000 0x00000001 0x0007000f 0x00000000
0x00000001 0x6e69616d 0x00000000 0x00000002
0x00000003 0x00060005 0x00000002 0x636e6166
0x74615f79 0x62697274 0x00657475 0x00060005
0x00000003 0x666573750x6f5f6c75 0x75707475
0x00000074 0x00040047 0x00000002 0x0000001e
0x00000004 0x00040047 0x00000003 0x0000001e
0x00000002 0x00030016 0x00000004 0x00000020
0x00040020 0x00000005 0x00000001 0x00000004
0x0004003b 0x00000005 0x00000002 0x00000001
0x00040020 0x00000006 0x00000003 0x00000004
0x0004003b 0x00000006 0x00000003 0x00000003
0x00020013 0x00000007 0x00030021 0x00000008
0x00000007 0x00050036 0x00000007 0x00000001
0x00000000 0x00000008 0x000200f8 0x00000009
0x0004003d 0x00000004 0x0000000a 0x00000002
0x0003003e 0x00000003 0x0000000a 0x000100fd
0x00010038)";
EnsureError(kHex);
}
TEST_F(HexToText, InvalidFirstToken) {
constexpr char kHex[] = R"(0x17230203, 0x00010600, 0x00070000, 0x0000000b
0x00000000, 0x00020011, 0x00000001, 0x0003000e
0x00000000, 0x00000001, 0x0007000f, 0x00000000
0x00000001, 0x6e69616d, 0x00000000, 0x00000002
0x00000003, 0x00060005, 0x00000002, 0x636e6166
0x74615f79, 0x62697274, 0x00657475, 0x00060005
0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
0x00000074, 0x00040047, 0x00000002, 0x0000001e
0x00000004, 0x00040047, 0x00000003, 0x0000001e
0x00000002, 0x00030016, 0x00000004, 0x00000020
0x00040020, 0x00000005, 0x00000001, 0x00000004
0x0004003b, 0x00000005, 0x00000002, 0x00000001
0x00040020, 0x00000006, 0x00000003, 0x00000004
0x0004003b, 0x00000006, 0x00000003, 0x00000003
0x00020013, 0x00000007, 0x00030021, 0x00000008
0x00000007, 0x00050036, 0x00000007, 0x00000001
0x00000000, 0x00000008, 0x000200f8, 0x00000009
0x0004003d, 0x00000004, 0x0000000a, 0x00000002
0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
0x00010038)";
EnsureError(kHex);
}
TEST_F(HexToText, NonHexCharacter) {
// Note: a 6 is replaced with G in this stream
constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
0x00000000, 0x00020011, 0x00000001, 0x0003000e
0x00000000, 0x00000001, 0x0007000f, 0x00000000
0x00000001, 0x6e69616d, 0x00000000, 0x00000002
0x00000003, 0x00060005, 0x00000002, 0x636e6166
0x74615f79, 0x62697274, 0x00657475, 0x00060005
0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
0x00000074, 0x00040047, 0x00000002, 0x0000001e
0x00000004, 0x00040047, 0x00000003, 0x0000001e
0x00000002, 0x0003001G, 0x00000004, 0x00000020
0x00040020, 0x00000005, 0x00000001, 0x00000004
0x0004003b, 0x00000005, 0x00000002, 0x00000001
0x00040020, 0x00000006, 0x00000003, 0x00000004
0x0004003b, 0x00000006, 0x00000003, 0x00000003
0x00020013, 0x00000007, 0x00030021, 0x00000008
0x00000007, 0x00050036, 0x00000007, 0x00000001
0x00000000, 0x00000008, 0x000200f8, 0x00000009
0x0004003d, 0x00000004, 0x0000000a, 0x00000002
0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
0x00010038)";
EnsureError(kHex);
}
TEST_F(HexToText, MissingExpectedPrefix) {
constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
0x00000000, 0x00020011, 0x00000001, 0x0003000e
0x00000000, 0x00000001, 0x0007000f, 0x00000000
0x00000001, 0x6e69616d, 0x00000000, 0x00000002
0x00000003, 0x00060005, 0x00000002, 0x636e6166
0x74615f79, 0x62697274, 0x00657475, 0x00060005
0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
0x00000074, 0x00040047, 0x00000002, 0x0000001e
0x00000004, 0x00040047, 0x00000003, 0x0000001e
0x00000002, 0x00030016, 0x00000004, 0x00000020
0x00040020, 0x00000005, 00000001, 0x00000004
0x0004003b, 0x00000005, 0x00000002, 0x00000001
0x00040020, 0x00000006, 0x00000003, 0x00000004
0x0004003b, 0x00000006, 0x00000003, 0x00000003
0x00020013, 0x00000007, 0x00030021, 0x00000008
0x00000007, 0x00050036, 0x00000007, 0x00000001
0x00000000, 0x00000008, 0x000200f8, 0x00000009
0x0004003d, 0x00000004, 0x0000000a, 0x00000002
0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
0x00010038)";
EnsureError(kHex);
}
TEST_F(HexToText, UnexpectedPrefix) {
constexpr char kHex[] = R"(07230203, 00010600, 00070000, 0000000b
00000000, 00020011, 00000001, 0003000e
00000000, 00000001, 0007000f, 00000000
00000001, 6e69616d, 00000000, 00000002
00000003, 00060005, 00000002, 636e6166
74615f79, 62697274, 00657475, 00060005
00000003, 66657375, 6f5f6c75, 75707475
00000074, 00040047, 00000002, 0000001e
00000004, 00040047, 00000003, 0000001e
00000002, 00030016, 00000004, 00000020
00040020, 00000005, 0x00000001, 00000004
0004003b, 00000005, 00000002, 00000001
00040020, 00000006, 00000003, 00000004
0004003b, 00000006, 00000003, 00000003
00020013, 00000007, 00030021, 00000008
00000007, 00050036, 00000007, 00000001
00000000, 00000008, 000200f8, 00000009
0004003d, 00000004, 0000000a, 00000002
0003003e, 00000003, 0000000a, 000100fd
00010038)";
EnsureError(kHex);
}
} // namespace
} // namespace spvtools

View File

@ -33,6 +33,10 @@ Usage: %s [options] [<filename>]
The SPIR-V binary is read from <filename>. If no file is specified,
or if the filename is "-", then the binary is read from standard input.
A text-based hex stream is also accepted as binary input, which should either
consist of 32-bit words or 8-bit bytes. The 0x or x prefix is optional, but
should be consistently present in the stream.
Options:
-h, --help Print this help.

View File

@ -15,6 +15,8 @@
#include "io.h"
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#if defined(SPIRV_WINDOWS)
#include <fcntl.h>
@ -40,7 +42,7 @@ template <typename T>
void ReadFile(FILE* file, std::vector<T>* data) {
if (file == nullptr) return;
const int buf_size = 1024;
const int buf_size = 4096 / sizeof(T);
T buf[buf_size];
while (size_t len = fread(buf, sizeof(T), buf_size, file)) {
data->insert(data->end(), buf, buf + len);
@ -48,9 +50,7 @@ void ReadFile(FILE* file, std::vector<T>* data) {
}
// Returns true if |file| has encountered an error opening the file or reading
// the file as a series of element of type |T|. If there was an error, writes an
// error message to standard error.
template <class T>
// from it. If there was an error, writes an error message to standard error.
bool WasFileCorrectlyRead(FILE* file, const char* filename) {
if (file == nullptr) {
fprintf(stderr, "error: file does not exist '%s'\n", filename);
@ -62,17 +62,291 @@ bool WasFileCorrectlyRead(FILE* file, const char* filename) {
fprintf(stderr, "error: error reading file '%s'\n", filename);
return false;
}
} else {
if (sizeof(T) != 1 && (ftell(file) % sizeof(T))) {
fprintf(
stderr,
"error: file size should be a multiple of %zd; file '%s' corrupt\n",
sizeof(T), filename);
return false;
}
}
return true;
}
// Ensure the file contained an exact number of elements, whose size is given in
// |alignment|.
bool WasFileSizeAligned(const char* filename, size_t read_size,
size_t alignment) {
assert(alignment != 1);
if ((read_size % alignment) != 0) {
fprintf(stderr,
"error: file size should be a multiple of %zd; file '%s' corrupt\n",
alignment, filename);
return false;
}
return true;
}
// Different formats the hex is expected to be in.
enum class HexMode {
// 0x07230203, ...
Words,
// 0x07, 0x23, 0x02, 0x03, ...
BytesBigEndian,
// 0x03, 0x02, 0x23, 0x07, ...
BytesLittleEndian,
// 07 23 02 03 ...
StreamBigEndian,
// 03 02 23 07 ...
StreamLittleEndian,
};
// Whether a character should be skipped as whitespace / separator /
// end-of-file.
bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; }
bool IsHexStream(const std::vector<char>& stream) {
for (char c : stream) {
if (IsSpace(c)) {
continue;
}
// Every possible case of a SPIR-V hex stream starts with either '0' or 'x'
// (see |HexMode| values). Make a decision upon inspecting the first
// non-space character.
return c == '0' || c == 'x' || c == 'X';
}
return false;
}
bool MatchIgnoreCase(const char* token, const char* expect, size_t len) {
for (size_t i = 0; i < len; ++i) {
if (tolower(token[i]) != tolower(expect[i])) {
return false;
}
}
return true;
}
// Helper class to tokenize a hex stream
class HexTokenizer {
public:
HexTokenizer(const char* filename, const std::vector<char>& stream,
std::vector<uint32_t>* data)
: filename_(filename), stream_(stream), data_(data) {
DetermineMode();
}
bool Parse() {
while (current_ < stream_.size() && !encountered_error_) {
data_->push_back(GetNextWord());
// Make sure trailing space does not lead to parse error by skipping it
// and exiting the loop.
SkipSpace();
}
return !encountered_error_;
}
private:
void ParseError(const char* reason) {
if (!encountered_error_) {
fprintf(stderr,
"error: hex stream parse error at character %zu: %s in '%s'\n",
current_, reason, filename_);
encountered_error_ = true;
}
}
// Skip whitespace until the next non-whitespace non-comma character.
void SkipSpace() {
while (current_ < stream_.size()) {
char c = stream_[current_];
if (!IsSpace(c)) {
return;
}
++current_;
}
}
// Skip the 0x or x at the beginning of a hex value.
void Skip0x() {
// The first character must be 0 or x.
const char first = Next();
if (first != '0' && first != 'x' && first != 'X') {
ParseError("expected 0x or x");
} else if (first == '0') {
const char second = Next();
if (second != 'x' && second != 'X') {
ParseError("expected 0x");
}
}
}
// Consume the next character.
char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; }
// Determine how to read the hex stream based on the first token.
void DetermineMode() {
SkipSpace();
// Read 11 bytes, that is the size of the biggest token (10) + one more.
char first_token[11];
for (uint32_t i = 0; i < 11; ++i) {
first_token[i] = Next();
}
// Table of how to match the first token with a mode.
struct {
const char* expect;
bool must_have_delimiter;
HexMode mode;
} parse_info[] = {
{"0x07230203", true, HexMode::Words},
{"0x7230203", true, HexMode::Words},
{"x07230203", true, HexMode::Words},
{"x7230203", true, HexMode::Words},
{"0x07", true, HexMode::BytesBigEndian},
{"0x7", true, HexMode::BytesBigEndian},
{"x07", true, HexMode::BytesBigEndian},
{"x7", true, HexMode::BytesBigEndian},
{"0x03", true, HexMode::BytesLittleEndian},
{"0x3", true, HexMode::BytesLittleEndian},
{"x03", true, HexMode::BytesLittleEndian},
{"x3", true, HexMode::BytesLittleEndian},
{"07", false, HexMode::StreamBigEndian},
{"03", false, HexMode::StreamLittleEndian},
};
// Check to see if any of the possible first tokens are matched. If not,
// this is not a recognized hex stream.
encountered_error_ = true;
for (const auto& info : parse_info) {
const size_t expect_len = strlen(info.expect);
const bool matches_expect =
MatchIgnoreCase(first_token, info.expect, expect_len);
const bool satisfies_delimeter =
!info.must_have_delimiter || IsSpace(first_token[expect_len]);
if (matches_expect && satisfies_delimeter) {
mode_ = info.mode;
encountered_error_ = false;
break;
}
}
if (encountered_error_) {
fprintf(stderr,
"error: hex format detected, but pattern '%.11s' is not "
"recognized '%s'\n",
first_token, filename_);
}
// Reset the position to restart parsing with the determined mode.
current_ = 0;
}
// Consume up to |max_len| characters and put them in |token_chars|. A
// delimiter is expected. The resulting string is NUL-terminated.
void NextN(char token_chars[9], size_t max_len) {
assert(max_len < 9);
for (size_t i = 0; i <= max_len; ++i) {
char c = Next();
if (IsSpace(c)) {
token_chars[i] = '\0';
return;
}
token_chars[i] = c;
if (!isxdigit(c)) {
ParseError("encountered non-hex character");
}
}
// If space is not reached before the maximum number of characters where
// consumed, that's an error.
ParseError("expected delimiter (space or comma)");
token_chars[max_len] = '\0';
}
// Consume one hex digit.
char NextHexDigit() {
char c = Next();
if (!isxdigit(c)) {
ParseError("encountered non-hex character");
}
return c;
}
// Extract a token out of the stream. It could be either a word or a byte,
// based on |mode_|.
uint32_t GetNextToken() {
SkipSpace();
// The longest token can be 8 chars (for |HexMode::Words|), add one for
// '\0'.
char token_chars[9];
switch (mode_) {
case HexMode::Words:
case HexMode::BytesBigEndian:
case HexMode::BytesLittleEndian:
// Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*)
// digits.
Skip0x();
NextN(token_chars, mode_ == HexMode::Words ? 8 : 2);
break;
case HexMode::StreamBigEndian:
case HexMode::StreamLittleEndian:
// Always expected to see two consecutive hex digits.
token_chars[0] = NextHexDigit();
token_chars[1] = NextHexDigit();
token_chars[2] = '\0';
break;
}
if (encountered_error_) {
return 0;
}
// Parse the hex value that was just read.
return static_cast<uint32_t>(strtol(token_chars, nullptr, 16));
}
// Construct a word out of tokens
uint32_t GetNextWord() {
if (mode_ == HexMode::Words) {
return GetNextToken();
}
uint32_t tokens[4] = {
GetNextToken(),
GetNextToken(),
GetNextToken(),
GetNextToken(),
};
switch (mode_) {
case HexMode::BytesBigEndian:
case HexMode::StreamBigEndian:
return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3];
case HexMode::BytesLittleEndian:
case HexMode::StreamLittleEndian:
return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0];
default:
assert(false);
return 0;
}
}
const char* filename_;
const std::vector<char>& stream_;
std::vector<uint32_t>* data_;
HexMode mode_ = HexMode::Words;
size_t current_ = 0;
bool encountered_error_ = false;
};
} // namespace
bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
@ -87,12 +361,39 @@ bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
fp = stdin;
}
ReadFile(fp, data);
bool succeeded = WasFileCorrectlyRead<uint32_t>(fp, filename);
// Read into a char vector first. If this is a hex stream, it needs to be
// processed as such.
std::vector<char> data_raw;
ReadFile(fp, &data_raw);
bool succeeded = WasFileCorrectlyRead(fp, filename);
if (use_file && fp) fclose(fp);
if (!succeeded) {
return false;
}
if (IsHexStream(data_raw)) {
// If a hex stream, parse it and fill |data|.
HexTokenizer tokenizer(filename, data_raw, data);
succeeded = tokenizer.Parse();
} else {
// If not a hex stream, convert it to uint32_t via memcpy.
succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t));
if (succeeded) {
data->resize(data_raw.size() / sizeof(uint32_t), 0);
memcpy(data->data(), data_raw.data(), data_raw.size());
}
}
return succeeded;
}
bool ConvertHexToBinary(const std::vector<char>& stream,
std::vector<uint32_t>* data) {
HexTokenizer tokenizer("<input string>", stream, data);
return tokenizer.Parse();
}
bool ReadTextFile(const char* filename, std::vector<char>* data) {
assert(data->empty());
@ -106,7 +407,7 @@ bool ReadTextFile(const char* filename, std::vector<char>* data) {
}
ReadFile(fp, data);
bool succeeded = WasFileCorrectlyRead<char>(fp, filename);
bool succeeded = WasFileCorrectlyRead(fp, filename);
if (use_file && fp) fclose(fp);
return succeeded;
}

View File

@ -25,8 +25,28 @@
// file. If |filename| is nullptr or "-", reads from the standard input, but
// reopened as a binary file. If any error occurs, writes error messages to
// standard error and returns false.
//
// If the given input is detected to be in ascii hex, it is converted to binary
// automatically. In that case, the shape of the input data is determined based
// on the representation of the magic number:
//
// * "[0]x[0]7230203": Every following "0x..." represents a word.
// * "[0]x[0]7[,] [0]x23...": Every following "0x..." represents a byte, stored
// in big-endian order
// * "[0]x[0]3[,] [0]x[0]2...": Every following "0x..." represents a byte,
// stored in little-endian order
// * "07[, ]23...": Every following "XY" represents a byte, stored in
// big-endian order
// * "03[, ]02...": Every following "XY" represents a byte, stored in
// little-endian order
bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data);
// The hex->binary logic of |ReadBinaryFile| applied to a pre-loaded stream of
// bytes. Used by tests to avoid having to call |ReadBinaryFile| with temp
// files. Returns false in case of parse errors.
bool ConvertHexToBinary(const std::vector<char>& stream,
std::vector<uint32_t>* data);
// Sets the contents of the file named |filename| in |data|, assuming each
// element in the file is of type |char|. The file is opened as a text file. If
// |filename| is nullptr or "-", reads from the standard input, but reopened as