tools: Accept hex representation as binary input (#5870)

Sometimes when debugging or logging, SPIR-V may be dumped as a stream of hex values. There are tools to convert such a stream to binary (such as [1]) but they create an inconvenient extra step when for example the disassembly of that hex stream is needed. [1]: https://www.khronos.org/spir/visualizer/hexdump.html In this change, the binary reader used by the tools is enhanced to detect when the binary is actually a hex stream, and parse that instead. The following formats are accepted, detected based on how the SPIR-V magic number is output: === Words If the first token of the hex stream is one of 0x07230203, 0x7230203, x07230203, or x7230203, the hex stream is expected to consist of 32-bit hex words prefixed with 0x or x. For example: 0x7230203, 0x10400, 0x180001, 0x79, 0x0 is parsed as: 0x07230203 0x00010400 0x00180001 0x00000079 0x00000000 Note that `,` is optional in the stream, but the hex values are expected to be delimited by either `,` or whitespace. === Bytes With Prefix If the first token of the hex stream is one of 0x07, 0x7, x07, x7, 0x03, 0x3, x03, or x3, the hex stream is expected to consist of 8-bit hex bytes prefixed with 0x or x. If the first token has a value of 7, the stream is big-endian. Otherwise it's little-endian. For example: 0x3, 0x2, 0x23, 0x7, 0x0, 0x4, 0x1, 0x0, 0x1, 0x0, 0x18, 0x0, 0x79, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 is parsed as: 0x07230203 0x00010400 0x00180001 0x00000079 0x00000000 Similar to "Words", `,` is optional in the stream, but the hex values are expected to be delimited by either `,` or whitespace. === Bytes Without Prefix If the first two characters of the hex stream is 07, or 03, the hex stream is expected to consist of 8-bit hex bytes of 2 characters each. If the first token is 07, the stream is big-endian. Otherwise it's little-endian. Unlike the other modes, delimiter is optional (which automatically handles 32-bit word streams), but no 0-padding is done. For example, all of the following: 03, 02, 23, 07, 00, 04, 01, 00, 01, 00, 18, 00, 79, 00, 00, 00, 00, 00, 00, 00 03 02 23 07 00 04 01 00 01 00 18 00 79 00 00 00 00 00 00 00 03022307 00040100 01001800 79000000 00000000 07,23,02,03,00,01,04,00,00,18,00,01,00,00,00,79,00,00,00,00 07230203, 00010400, 00180001, 00000079, 00000000 are parsed as: 0x07230203 0x00010400 0x00180001 0x00000079 0x00000000
2024-12-03 16:01:03 +00:00 · 2024-11-04 09:57:37 -05:00 · 2024-11-04 09:57:37 -05:00 · 02433568af
commit 02433568af
parent d426fc53bb
10 changed files with 777 additions and 17 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -447,6 +447,7 @@ cc_library(
        "tools_util",
        ":spirv_tools_internal",
        ":test_lib",
+        ":tools_io",
        "@googletest//:gtest",
        "@googletest//:gtest_main",
    ],
--- a/BUILD.gn
+++ b/BUILD.gn
@ -1377,6 +1377,7 @@ if (build_with_chromium && spvtools_build_executables) {
      "test/fix_word_test.cpp",
      "test/generator_magic_number_test.cpp",
      "test/hex_float_test.cpp",
+      "test/hex_to_text_test.cpp",
      "test/immediate_int_test.cpp",
      "test/libspirv_macros_test.cpp",
      "test/name_mapper_test.cpp",
@ -1424,6 +1425,7 @@ if (build_with_chromium && spvtools_build_executables) {
      ":spvtools_language_header_cldebuginfo100",
      ":spvtools_language_header_debuginfo",
      ":spvtools_language_header_vkdebuginfo100",
+      ":spvtools_tools_io",
      ":spvtools_val",
      "//testing/gmock",
      "//testing/gtest",
--- a/README.md
+++ b/README.md
@ -80,6 +80,8 @@ further notice.
 * Assembler only does basic syntax checking.  No cross validation of
  IDs or types is performed, except to check literal arguments to
  `OpConstant`, `OpSpecConstant`, and `OpSwitch`.
+* Where tools expect binary input, a hex stream may be provided instead.  See
+  `spirv-dis --help`.

 See [`docs/syntax.md`](docs/syntax.md) for the assembly language syntax.

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -88,6 +88,7 @@ endfunction()
 set(TEST_SOURCES
  test_fixture.h
  unit_spirv.h
+  ${spirv-tools_SOURCE_DIR}/tools/io.h

  assembly_context_test.cpp
  assembly_format_test.cpp
@ -110,6 +111,7 @@ set(TEST_SOURCES
  fix_word_test.cpp
  generator_magic_number_test.cpp
  hex_float_test.cpp
+  hex_to_text_test.cpp
  immediate_int_test.cpp
  libspirv_macros_test.cpp
  named_id_test.cpp
@ -154,6 +156,7 @@ set(TEST_SOURCES
  to_string_test.cpp

  unit_spirv.cpp
+  ${spirv-tools_SOURCE_DIR}/tools/io.cpp
 )

 spvtools_pch(TEST_SOURCES pch_test)
--- a/test/diff/diff_test.cpp
+++ b/test/diff/diff_test.cpp
@ -20,7 +20,6 @@
 #include "source/opt/ir_context.h"
 #include "source/spirv_constant.h"
 #include "spirv-tools/libspirv.hpp"
-#include "tools/io.h"
 #include "tools/util/cli_consumer.h"

 #include <fstream>
--- a/test/diff/diff_test_utils.cpp
+++ b/test/diff/diff_test_utils.cpp
@ -18,7 +18,6 @@
 #include "source/opt/ir_context.h"

 #include "spirv-tools/libspirv.hpp"
-#include "tools/io.h"
 #include "tools/util/cli_consumer.h"

 #include "gtest/gtest.h"
--- a/test/hex_to_text_test.cpp
+++ b/test/hex_to_text_test.cpp
@ -0,0 +1,429 @@
+// Copyright (c) 2024 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "test/test_fixture.h"
+#include "tools/io.h"
+
+namespace spvtools {
+namespace {
+
+using spvtest::ScopedContext;
+
+class HexToText : public ::testing::Test {
+ public:
+  void VerifyDisassembly(const char* hex_stream,
+                         const char* expected_disassembly) {
+    std::vector<char> stream(hex_stream, hex_stream + strlen(hex_stream));
+    std::vector<uint32_t> binary;
+
+    // Convert hext to binary first.
+    EXPECT_TRUE(ConvertHexToBinary(stream, &binary));
+
+    // Then disassemble it.
+    spv_diagnostic diagnostic = nullptr;
+    spv_text disassembly = nullptr;
+    EXPECT_EQ(spvBinaryToText(ScopedContext().context, binary.data(),
+                              binary.size(), SPV_BINARY_TO_TEXT_OPTION_NONE,
+                              &disassembly, &diagnostic),
+              SPV_SUCCESS);
+    EXPECT_EQ(diagnostic, nullptr);
+
+    // Verify disassembly is as expected and clean up.
+    EXPECT_STREQ(disassembly->str, expected_disassembly);
+
+    spvDiagnosticDestroy(diagnostic);
+    spvTextDestroy(disassembly);
+  }
+
+  void EnsureError(const char* hex_stream) {
+    std::vector<char> stream(hex_stream, hex_stream + strlen(hex_stream));
+    std::vector<uint32_t> binary;
+
+    // Make sure there is a parse error
+    EXPECT_FALSE(ConvertHexToBinary(stream, &binary));
+  }
+};
+
+// The actual assembly doesn't matter, just the hex parsing.  All the tests use
+// the following SPIR-V.
+constexpr char kDisassembly[] = R"(; SPIR-V
+; Version: 1.6
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 11
+; Schema: 0
+OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %1 "main" %2 %3
+OpName %2 "fancy_attribute"
+OpName %3 "useful_output"
+OpDecorate %2 Location 4
+OpDecorate %3 Location 2
+%4 = OpTypeFloat 32
+%5 = OpTypePointer Input %4
+%2 = OpVariable %5 Input
+%6 = OpTypePointer Output %4
+%3 = OpVariable %6 Output
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%1 = OpFunction %7 None %8
+%9 = OpLabel
+%10 = OpLoad %4 %2
+OpStore %3 %10
+OpReturn
+OpFunctionEnd
+)";
+
+TEST_F(HexToText, Words) {
+  constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
+0x00000000, 0x00020011, 0x00000001, 0x0003000e
+0x00000000, 0x00000001, 0x0007000f, 0x00000000
+0x00000001, 0x6e69616d, 0x00000000, 0x00000002
+0x00000003, 0x00060005, 0x00000002, 0x636e6166
+0x74615f79, 0x62697274, 0x00657475, 0x00060005
+0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
+0x00000074, 0x00040047, 0x00000002, 0x0000001e
+0x00000004, 0x00040047, 0x00000003, 0x0000001e
+0x00000002, 0x00030016, 0x00000004, 0x00000020
+0x00040020, 0x00000005, 0x00000001, 0x00000004
+0x0004003b, 0x00000005, 0x00000002, 0x00000001
+0x00040020, 0x00000006, 0x00000003, 0x00000004
+0x0004003b, 0x00000006, 0x00000003, 0x00000003
+0x00020013, 0x00000007, 0x00030021, 0x00000008
+0x00000007, 0x00050036, 0x00000007, 0x00000001
+0x00000000, 0x00000008, 0x000200f8, 0x00000009
+0x0004003d, 0x00000004, 0x0000000a, 0x00000002
+0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
+0x00010038)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, WordsLeadingSpace) {
+  constexpr char kHex[] = R"(    
+x07230203, x00010600, x00070000, x0000000b
+x00000000, x00020011, x00000001, x0003000e
+x00000000, x00000001, x0007000f, x00000000
+x00000001, x6e69616d, x00000000, x00000002
+x00000003, x00060005, x00000002, x636e6166
+x74615f79, x62697274, x00657475, x00060005
+x00000003, x66657375, x6f5f6c75, x75707475
+x00000074, x00040047, x00000002, x0000001e
+x00000004, x00040047, x00000003, x0000001e
+x00000002, x00030016, x00000004, x00000020
+x00040020, x00000005, x00000001, x00000004
+x0004003b, x00000005, x00000002, x00000001
+x00040020, x00000006, x00000003, x00000004
+x0004003b, x00000006, x00000003, x00000003
+x00020013, x00000007, x00030021, x00000008
+x00000007, x00050036, x00000007, x00000001
+x00000000, x00000008, x000200f8, x00000009
+x0004003d, x00000004, x0000000a, x00000002
+x0003003e, x00000003, x0000000a, x000100fd
+x00010038)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, WordsTrailingSpace) {
+  constexpr char kHex[] = R"(0X7230203, 0X10600, 0X70000, 0XB
+0X0, 0X20011, 0X1, 0X3000E
+0X0, 0X1, 0X7000F, 0X0
+0X1, X6E69616D, 0X0, 0X2
+0X3, 0X60005, 0X2, X636E6166
+X74615F79, X62697274, 0X657475, 0X60005
+0X3, X66657375, X6F5F6C75, X75707475
+0X74, 0X40047, 0X2, 0X1E
+0X4, 0X40047, 0X3, 0X1E
+0X2, 0X30016, 0X4, 0X20
+0X40020, 0X5, 0X1, 0X4
+0X4003B, 0X5, 0X2, 0X1
+0X40020, 0X6, 0X3, 0X4
+0X4003B, 0X6, 0X3, 0X3
+0X20013, 0X7, 0X30021, 0X8
+0X7, 0X50036, 0X7, 0X1
+0X0, 0X8, 0X200F8, 0X9
+0X4003D, 0X4, 0XA, 0X2
+0X3003E, 0X3, 0XA, 0X100FD
+0X10038     
+
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, BytesLittleEndian) {
+  constexpr char kHex[] = R"(
+0x03  0x02  0x23  0x07  0x00  0x06  0x01  0x00  0x00  0x00  0x07  0x00  0x0b  0x00  0x00  0x00
+0x00  0x00  0x00  0x00  0x11  0x00  0x02  0x00  0x01  0x00  0x00  0x00  0x0e  0x00  0x03  0x00
+0x00  0x00  0x00  0x00  0x01  0x00  0x00  0x00  0x0f  0x00  0x07  0x00  0x00  0x00  0x00  0x00
+0x01  0x00  0x00  0x00  0x6d  0x61  0x69  0x6e  0x00  0x00  0x00  0x00  0x02  0x00  0x00  0x00
+0x03  0x00  0x00  0x00  0x05  0x00  0x06  0x00  0x02  0x00  0x00  0x00  0x66  0x61  0x6e  0x63
+0x79  0x5f  0x61  0x74  0x74  0x72  0x69  0x62  0x75  0x74  0x65  0x00  0x05  0x00  0x06  0x00
+0x03  0x00  0x00  0x00  0x75  0x73  0x65  0x66  0x75  0x6c  0x5f  0x6f  0x75  0x74  0x70  0x75
+0x74  0x00  0x00  0x00  0x47  0x00  0x04  0x00  0x02  0x00  0x00  0x00  0x1e  0x00  0x00  0x00
+0x04  0x00  0x00  0x00  0x47  0x00  0x04  0x00  0x03  0x00  0x00  0x00  0x1e  0x00  0x00  0x00
+0x02  0x00  0x00  0x00  0x16  0x00  0x03  0x00  0x04  0x00  0x00  0x00  0x20  0x00  0x00  0x00
+0x20  0x00  0x04  0x00  0x05  0x00  0x00  0x00  0x01  0x00  0x00  0x00  0x04  0x00  0x00  0x00
+0x3b  0x00  0x04  0x00  0x05  0x00  0x00  0x00  0x02  0x00  0x00  0x00  0x01  0x00  0x00  0x00
+0x20  0x00  0x04  0x00  0x06  0x00  0x00  0x00  0x03  0x00  0x00  0x00  0x04  0x00  0x00  0x00
+0x3b  0x00  0x04  0x00  0x06  0x00  0x00  0x00  0x03  0x00  0x00  0x00  0x03  0x00  0x00  0x00
+0x13  0x00  0x02  0x00  0x07  0x00  0x00  0x00  0x21  0x00  0x03  0x00  0x08  0x00  0x00  0x00
+0x07  0x00  0x00  0x00  0x36  0x00  0x05  0x00  0x07  0x00  0x00  0x00  0x01  0x00  0x00  0x00
+0x00  0x00  0x00  0x00  0x08  0x00  0x00  0x00  0xf8  0x00  0x02  0x00  0x09  0x00  0x00  0x00
+0x3d  0x00  0x04  0x00  0x04  0x00  0x00  0x00  0x0a  0x00  0x00  0x00  0x02  0x00  0x00  0x00
+0x3e  0x00  0x03  0x00  0x03  0x00  0x00  0x00  0x0a  0x00  0x00  0x00  0xfd  0x00  0x01  0x00
+0x38  0x00  0x01  0x00
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, BytesBigEndian) {
+  constexpr char kHex[] = R"(
+X07,X23,X02,X03, X00,X01,X06,X00, X00,X07,X00,X00, X00,X00,X00,X0B
+X00,X00,X00,X00, X00,X02,X00,X11, X00,X00,X00,X01, X00,X03,X00,X0E
+X00,X00,X00,X00, X00,X00,X00,X01, X00,X07,X00,X0F, X00,X00,X00,X00
+X00,X00,X00,X01, X6E,X69,X61,X6D, X00,X00,X00,X00, X00,X00,X00,X02
+X00,X00,X00,X03, X00,X06,X00,X05, X00,X00,X00,X02, X63,X6E,X61,X66
+X74,X61,X5F,X79, X62,X69,X72,X74, X00,X65,X74,X75, X00,X06,X00,X05
+X00,X00,X00,X03, X66,X65,X73,X75, X6F,X5F,X6C,X75, X75,X70,X74,X75
+X00,X00,X00,X74, X00,X04,X00,X47, X00,X00,X00,X02, X00,X00,X00,X1E
+X00,X00,X00,X04, X00,X04,X00,X47, X00,X00,X00,X03, X00,X00,X00,X1E
+X00,X00,X00,X02, X00,X03,X00,X16, X00,X00,X00,X04, X00,X00,X00,X20
+X00,X04,X00,X20, X00,X00,X00,X05, X00,X00,X00,X01, X00,X00,X00,X04
+X00,X04,X00,X3B, X00,X00,X00,X05, X00,X00,X00,X02, X00,X00,X00,X01
+X00,X04,X00,X20, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X04
+X00,X04,X00,X3B, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X03
+X00,X02,X00,X13, X00,X00,X00,X07, X00,X03,X00,X21, X00,X00,X00,X08
+X00,X00,X00,X07, X00,X05,X00,X36, X00,X00,X00,X07, X00,X00,X00,X01
+X00,X00,X00,X00, X00,X00,X00,X08, X00,X02,X00,XF8, X00,X00,X00,X09
+X00,X04,X00,X3D, X00,X00,X00,X04, X00,X00,X00,X0A, X00,X00,X00,X02
+X00,X03,X00,X3E, X00,X00,X00,X03, X00,X00,X00,X0A, X00,X01,X00,XFD
+X00,X01,X00,X38,
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, StreamLittleEndian) {
+  constexpr char kHex[] = R"(
+03  02  23  07  00  06  01  00  00  00  07  00  0b  00  00  00
+00  00  00  00  11  00  02  00  01  00  00  00  0e  00  03  00
+00  00  00  00  01  00  00  00  0f  00  07  00  00  00  00  00
+01  00  00  00  6d  61  69  6e  00  00  00  00  02  00  00  00
+03  00  00  00  05  00  06  00  02  00  00  00  66  61  6e  63
+79  5f  61  74  74  72  69  62  75  74  65  00  05  00  06  00
+03  00  00  00  75  73  65  66  75  6c  5f  6f  75  74  70  75
+74  00  00  00  47  00  04  00  02  00  00  00  1e  00  00  00
+04  00  00  00  47  00  04  00  03  00  00  00  1e  00  00  00
+02  00  00  00  16  00  03  00  04  00  00  00  20  00  00  00
+20  00  04  00  05  00  00  00  01  00  00  00  04  00  00  00
+3b  00  04  00  05  00  00  00  02  00  00  00  01  00  00  00
+20  00  04  00  06  00  00  00  03  00  00  00  04  00  00  00
+3b  00  04  00  06  00  00  00  03  00  00  00  03  00  00  00
+13  00  02  00  07  00  00  00  21  00  03  00  08  00  00  00
+07  00  00  00  36  00  05  00  07  00  00  00  01  00  00  00
+00  00  00  00  08  00  00  00  f8  00  02  00  09  00  00  00
+3d  00  04  00  04  00  00  00  0a  00  00  00  02  00  00  00
+3e  00  03  00  03  00  00  00  0a  00  00  00  fd  00  01  00
+38  00  01  00
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, StreamLittleEndianNoDelim) {
+  constexpr char kHex[] = R"(
+0302230700060100000007000B000000
+0000000011000200010000000E000300
+00000000010000000F00070000000000
+010000006D61696E0000000002000000
+03000000050006000200000066616E63
+795F6174747269627574650005000600
+0300000075736566756C5F6F75747075
+7400000047000400020000001E000000
+0400000047000400030000001E000000
+02000000160003000400000020000000
+20000400050000000100000004000000
+3B000400050000000200000001000000
+20000400060000000300000004000000
+3B000400060000000300000003000000
+13000200070000002100030008000000
+07000000360005000700000001000000
+0000000008000000F800020009000000
+3D000400040000000A00000002000000
+3E000300030000000A000000FD000100
+38000100
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, StreamBigEndian) {
+  constexpr char kHex[] = R"(
+07230203, 00010600, 00070000, 0000000b
+00000000, 00020011, 00000001, 0003000e
+00000000, 00000001, 0007000f, 00000000
+00000001, 6e69616d, 00000000, 00000002
+00000003, 00060005, 00000002, 636e6166
+74615f79, 62697274, 00657475, 00060005
+00000003, 66657375, 6f5f6c75, 75707475
+00000074, 00040047, 00000002, 0000001e
+00000004, 00040047, 00000003, 0000001e
+00000002, 00030016, 00000004, 00000020
+00040020, 00000005, 00000001, 00000004
+0004003b, 00000005, 00000002, 00000001
+00040020, 00000006, 00000003, 00000004
+0004003b, 00000006, 00000003, 00000003
+00020013, 00000007, 00030021, 00000008
+00000007, 00050036, 00000007, 00000001
+00000000, 00000008, 000200f8, 00000009
+0004003d, 00000004, 0000000a, 00000002
+0003003e, 00000003, 0000000a, 000100fd
+00010038,
+)";
+
+  VerifyDisassembly(kHex, kDisassembly);
+}
+
+TEST_F(HexToText, WordsNoDelimieter) {
+  constexpr char kHex[] = R"(0x07230203 0x00010600 0x00070000 0x0000000b
+0x00000000 0x00020011 0x00000001 0x0003000e
+0x00000000 0x00000001 0x0007000f 0x00000000
+0x00000001 0x6e69616d 0x00000000 0x00000002
+0x00000003 0x00060005 0x00000002 0x636e6166
+0x74615f79 0x62697274 0x00657475 0x00060005
+0x00000003 0x666573750x6f5f6c75 0x75707475
+0x00000074 0x00040047 0x00000002 0x0000001e
+0x00000004 0x00040047 0x00000003 0x0000001e
+0x00000002 0x00030016 0x00000004 0x00000020
+0x00040020 0x00000005 0x00000001 0x00000004
+0x0004003b 0x00000005 0x00000002 0x00000001
+0x00040020 0x00000006 0x00000003 0x00000004
+0x0004003b 0x00000006 0x00000003 0x00000003
+0x00020013 0x00000007 0x00030021 0x00000008
+0x00000007 0x00050036 0x00000007 0x00000001
+0x00000000 0x00000008 0x000200f8 0x00000009
+0x0004003d 0x00000004 0x0000000a 0x00000002
+0x0003003e 0x00000003 0x0000000a 0x000100fd
+0x00010038)";
+
+  EnsureError(kHex);
+}
+
+TEST_F(HexToText, InvalidFirstToken) {
+  constexpr char kHex[] = R"(0x17230203, 0x00010600, 0x00070000, 0x0000000b
+0x00000000, 0x00020011, 0x00000001, 0x0003000e
+0x00000000, 0x00000001, 0x0007000f, 0x00000000
+0x00000001, 0x6e69616d, 0x00000000, 0x00000002
+0x00000003, 0x00060005, 0x00000002, 0x636e6166
+0x74615f79, 0x62697274, 0x00657475, 0x00060005
+0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
+0x00000074, 0x00040047, 0x00000002, 0x0000001e
+0x00000004, 0x00040047, 0x00000003, 0x0000001e
+0x00000002, 0x00030016, 0x00000004, 0x00000020
+0x00040020, 0x00000005, 0x00000001, 0x00000004
+0x0004003b, 0x00000005, 0x00000002, 0x00000001
+0x00040020, 0x00000006, 0x00000003, 0x00000004
+0x0004003b, 0x00000006, 0x00000003, 0x00000003
+0x00020013, 0x00000007, 0x00030021, 0x00000008
+0x00000007, 0x00050036, 0x00000007, 0x00000001
+0x00000000, 0x00000008, 0x000200f8, 0x00000009
+0x0004003d, 0x00000004, 0x0000000a, 0x00000002
+0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
+0x00010038)";
+
+  EnsureError(kHex);
+}
+
+TEST_F(HexToText, NonHexCharacter) {
+  // Note: a 6 is replaced with G in this stream
+  constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
+0x00000000, 0x00020011, 0x00000001, 0x0003000e
+0x00000000, 0x00000001, 0x0007000f, 0x00000000
+0x00000001, 0x6e69616d, 0x00000000, 0x00000002
+0x00000003, 0x00060005, 0x00000002, 0x636e6166
+0x74615f79, 0x62697274, 0x00657475, 0x00060005
+0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
+0x00000074, 0x00040047, 0x00000002, 0x0000001e
+0x00000004, 0x00040047, 0x00000003, 0x0000001e
+0x00000002, 0x0003001G, 0x00000004, 0x00000020
+0x00040020, 0x00000005, 0x00000001, 0x00000004
+0x0004003b, 0x00000005, 0x00000002, 0x00000001
+0x00040020, 0x00000006, 0x00000003, 0x00000004
+0x0004003b, 0x00000006, 0x00000003, 0x00000003
+0x00020013, 0x00000007, 0x00030021, 0x00000008
+0x00000007, 0x00050036, 0x00000007, 0x00000001
+0x00000000, 0x00000008, 0x000200f8, 0x00000009
+0x0004003d, 0x00000004, 0x0000000a, 0x00000002
+0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
+0x00010038)";
+
+  EnsureError(kHex);
+}
+
+TEST_F(HexToText, MissingExpectedPrefix) {
+  constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b
+0x00000000, 0x00020011, 0x00000001, 0x0003000e
+0x00000000, 0x00000001, 0x0007000f, 0x00000000
+0x00000001, 0x6e69616d, 0x00000000, 0x00000002
+0x00000003, 0x00060005, 0x00000002, 0x636e6166
+0x74615f79, 0x62697274, 0x00657475, 0x00060005
+0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475
+0x00000074, 0x00040047, 0x00000002, 0x0000001e
+0x00000004, 0x00040047, 0x00000003, 0x0000001e
+0x00000002, 0x00030016, 0x00000004, 0x00000020
+0x00040020, 0x00000005,   00000001, 0x00000004
+0x0004003b, 0x00000005, 0x00000002, 0x00000001
+0x00040020, 0x00000006, 0x00000003, 0x00000004
+0x0004003b, 0x00000006, 0x00000003, 0x00000003
+0x00020013, 0x00000007, 0x00030021, 0x00000008
+0x00000007, 0x00050036, 0x00000007, 0x00000001
+0x00000000, 0x00000008, 0x000200f8, 0x00000009
+0x0004003d, 0x00000004, 0x0000000a, 0x00000002
+0x0003003e, 0x00000003, 0x0000000a, 0x000100fd
+0x00010038)";
+
+  EnsureError(kHex);
+}
+
+TEST_F(HexToText, UnexpectedPrefix) {
+  constexpr char kHex[] = R"(07230203, 00010600, 00070000, 0000000b
+00000000, 00020011, 00000001, 0003000e
+00000000, 00000001, 0007000f, 00000000
+00000001, 6e69616d, 00000000, 00000002
+00000003, 00060005, 00000002, 636e6166
+74615f79, 62697274, 00657475, 00060005
+00000003, 66657375, 6f5f6c75, 75707475
+00000074, 00040047, 00000002, 0000001e
+00000004, 00040047, 00000003, 0000001e
+00000002, 00030016, 00000004, 00000020
+00040020, 00000005, 0x00000001, 00000004
+0004003b, 00000005, 00000002, 00000001
+00040020, 00000006, 00000003, 00000004
+0004003b, 00000006, 00000003, 00000003
+00020013, 00000007, 00030021, 00000008
+00000007, 00050036, 00000007, 00000001
+00000000, 00000008, 000200f8, 00000009
+0004003d, 00000004, 0000000a, 00000002
+0003003e, 00000003, 0000000a, 000100fd
+00010038)";
+
+  EnsureError(kHex);
+}
+}  // namespace
+}  // namespace spvtools
--- a/tools/dis/dis.cpp
+++ b/tools/dis/dis.cpp
@ -33,6 +33,10 @@ Usage: %s [options] [<filename>]
 The SPIR-V binary is read from <filename>. If no file is specified,
 or if the filename is "-", then the binary is read from standard input.

+A text-based hex stream is also accepted as binary input, which should either
+consist of 32-bit words or 8-bit bytes.  The 0x or x prefix is optional, but
+should be consistently present in the stream.
+
 Options:

  -h, --help        Print this help.
--- a/tools/io.cpp
+++ b/tools/io.cpp
@ -15,6 +15,8 @@
 #include "io.h"

 #include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>

 #if defined(SPIRV_WINDOWS)
 #include <fcntl.h>
@ -40,7 +42,7 @@ template <typename T>
 void ReadFile(FILE* file, std::vector<T>* data) {
  if (file == nullptr) return;

-  const int buf_size = 1024;
+  const int buf_size = 4096 / sizeof(T);
  T buf[buf_size];
  while (size_t len = fread(buf, sizeof(T), buf_size, file)) {
    data->insert(data->end(), buf, buf + len);
@ -48,9 +50,7 @@ void ReadFile(FILE* file, std::vector<T>* data) {
 }

 // Returns true if |file| has encountered an error opening the file or reading
-// the file as a series of element of type |T|. If there was an error, writes an
-// error message to standard error.
-template <class T>
+// from it. If there was an error, writes an error message to standard error.
 bool WasFileCorrectlyRead(FILE* file, const char* filename) {
  if (file == nullptr) {
    fprintf(stderr, "error: file does not exist '%s'\n", filename);
@ -62,17 +62,291 @@ bool WasFileCorrectlyRead(FILE* file, const char* filename) {
      fprintf(stderr, "error: error reading file '%s'\n", filename);
      return false;
    }
-  } else {
-    if (sizeof(T) != 1 && (ftell(file) % sizeof(T))) {
-      fprintf(
-          stderr,
-          "error: file size should be a multiple of %zd; file '%s' corrupt\n",
-          sizeof(T), filename);
-      return false;
-    }
  }
  return true;
 }
+
+// Ensure the file contained an exact number of elements, whose size is given in
+// |alignment|.
+bool WasFileSizeAligned(const char* filename, size_t read_size,
+                        size_t alignment) {
+  assert(alignment != 1);
+  if ((read_size % alignment) != 0) {
+    fprintf(stderr,
+            "error: file size should be a multiple of %zd; file '%s' corrupt\n",
+            alignment, filename);
+    return false;
+  }
+  return true;
+}
+
+// Different formats the hex is expected to be in.
+enum class HexMode {
+  // 0x07230203, ...
+  Words,
+  // 0x07, 0x23, 0x02, 0x03, ...
+  BytesBigEndian,
+  // 0x03, 0x02, 0x23, 0x07, ...
+  BytesLittleEndian,
+  // 07 23 02 03 ...
+  StreamBigEndian,
+  // 03 02 23 07 ...
+  StreamLittleEndian,
+};
+
+// Whether a character should be skipped as whitespace / separator /
+// end-of-file.
+bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; }
+
+bool IsHexStream(const std::vector<char>& stream) {
+  for (char c : stream) {
+    if (IsSpace(c)) {
+      continue;
+    }
+
+    // Every possible case of a SPIR-V hex stream starts with either '0' or 'x'
+    // (see |HexMode| values).  Make a decision upon inspecting the first
+    // non-space character.
+    return c == '0' || c == 'x' || c == 'X';
+  }
+
+  return false;
+}
+
+bool MatchIgnoreCase(const char* token, const char* expect, size_t len) {
+  for (size_t i = 0; i < len; ++i) {
+    if (tolower(token[i]) != tolower(expect[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Helper class to tokenize a hex stream
+class HexTokenizer {
+ public:
+  HexTokenizer(const char* filename, const std::vector<char>& stream,
+               std::vector<uint32_t>* data)
+      : filename_(filename), stream_(stream), data_(data) {
+    DetermineMode();
+  }
+
+  bool Parse() {
+    while (current_ < stream_.size() && !encountered_error_) {
+      data_->push_back(GetNextWord());
+
+      // Make sure trailing space does not lead to parse error by skipping it
+      // and exiting the loop.
+      SkipSpace();
+    }
+
+    return !encountered_error_;
+  }
+
+ private:
+  void ParseError(const char* reason) {
+    if (!encountered_error_) {
+      fprintf(stderr,
+              "error: hex stream parse error at character %zu: %s in '%s'\n",
+              current_, reason, filename_);
+      encountered_error_ = true;
+    }
+  }
+
+  // Skip whitespace until the next non-whitespace non-comma character.
+  void SkipSpace() {
+    while (current_ < stream_.size()) {
+      char c = stream_[current_];
+      if (!IsSpace(c)) {
+        return;
+      }
+
+      ++current_;
+    }
+  }
+
+  // Skip the 0x or x at the beginning of a hex value.
+  void Skip0x() {
+    // The first character must be 0 or x.
+    const char first = Next();
+    if (first != '0' && first != 'x' && first != 'X') {
+      ParseError("expected 0x or x");
+    } else if (first == '0') {
+      const char second = Next();
+      if (second != 'x' && second != 'X') {
+        ParseError("expected 0x");
+      }
+    }
+  }
+
+  // Consume the next character.
+  char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; }
+
+  // Determine how to read the hex stream based on the first token.
+  void DetermineMode() {
+    SkipSpace();
+
+    // Read 11 bytes, that is the size of the biggest token (10) + one more.
+    char first_token[11];
+    for (uint32_t i = 0; i < 11; ++i) {
+      first_token[i] = Next();
+    }
+
+    // Table of how to match the first token with a mode.
+    struct {
+      const char* expect;
+      bool must_have_delimiter;
+      HexMode mode;
+    } parse_info[] = {
+        {"0x07230203", true, HexMode::Words},
+        {"0x7230203", true, HexMode::Words},
+        {"x07230203", true, HexMode::Words},
+        {"x7230203", true, HexMode::Words},
+
+        {"0x07", true, HexMode::BytesBigEndian},
+        {"0x7", true, HexMode::BytesBigEndian},
+        {"x07", true, HexMode::BytesBigEndian},
+        {"x7", true, HexMode::BytesBigEndian},
+
+        {"0x03", true, HexMode::BytesLittleEndian},
+        {"0x3", true, HexMode::BytesLittleEndian},
+        {"x03", true, HexMode::BytesLittleEndian},
+        {"x3", true, HexMode::BytesLittleEndian},
+
+        {"07", false, HexMode::StreamBigEndian},
+        {"03", false, HexMode::StreamLittleEndian},
+    };
+
+    // Check to see if any of the possible first tokens are matched.  If not,
+    // this is not a recognized hex stream.
+    encountered_error_ = true;
+    for (const auto& info : parse_info) {
+      const size_t expect_len = strlen(info.expect);
+      const bool matches_expect =
+          MatchIgnoreCase(first_token, info.expect, expect_len);
+      const bool satisfies_delimeter =
+          !info.must_have_delimiter || IsSpace(first_token[expect_len]);
+      if (matches_expect && satisfies_delimeter) {
+        mode_ = info.mode;
+        encountered_error_ = false;
+        break;
+      }
+    }
+
+    if (encountered_error_) {
+      fprintf(stderr,
+              "error: hex format detected, but pattern '%.11s' is not "
+              "recognized '%s'\n",
+              first_token, filename_);
+    }
+
+    // Reset the position to restart parsing with the determined mode.
+    current_ = 0;
+  }
+
+  // Consume up to |max_len| characters and put them in |token_chars|.  A
+  // delimiter is expected. The resulting string is NUL-terminated.
+  void NextN(char token_chars[9], size_t max_len) {
+    assert(max_len < 9);
+
+    for (size_t i = 0; i <= max_len; ++i) {
+      char c = Next();
+      if (IsSpace(c)) {
+        token_chars[i] = '\0';
+        return;
+      }
+
+      token_chars[i] = c;
+      if (!isxdigit(c)) {
+        ParseError("encountered non-hex character");
+      }
+    }
+
+    // If space is not reached before the maximum number of characters where
+    // consumed, that's an error.
+    ParseError("expected delimiter (space or comma)");
+    token_chars[max_len] = '\0';
+  }
+
+  // Consume one hex digit.
+  char NextHexDigit() {
+    char c = Next();
+    if (!isxdigit(c)) {
+      ParseError("encountered non-hex character");
+    }
+    return c;
+  }
+
+  // Extract a token out of the stream.  It could be either a word or a byte,
+  // based on |mode_|.
+  uint32_t GetNextToken() {
+    SkipSpace();
+
+    // The longest token can be 8 chars (for |HexMode::Words|), add one for
+    // '\0'.
+    char token_chars[9];
+
+    switch (mode_) {
+      case HexMode::Words:
+      case HexMode::BytesBigEndian:
+      case HexMode::BytesLittleEndian:
+        // Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*)
+        // digits.
+        Skip0x();
+        NextN(token_chars, mode_ == HexMode::Words ? 8 : 2);
+        break;
+      case HexMode::StreamBigEndian:
+      case HexMode::StreamLittleEndian:
+        // Always expected to see two consecutive hex digits.
+        token_chars[0] = NextHexDigit();
+        token_chars[1] = NextHexDigit();
+        token_chars[2] = '\0';
+        break;
+    }
+
+    if (encountered_error_) {
+      return 0;
+    }
+
+    // Parse the hex value that was just read.
+    return static_cast<uint32_t>(strtol(token_chars, nullptr, 16));
+  }
+
+  // Construct a word out of tokens
+  uint32_t GetNextWord() {
+    if (mode_ == HexMode::Words) {
+      return GetNextToken();
+    }
+
+    uint32_t tokens[4] = {
+        GetNextToken(),
+        GetNextToken(),
+        GetNextToken(),
+        GetNextToken(),
+    };
+
+    switch (mode_) {
+      case HexMode::BytesBigEndian:
+      case HexMode::StreamBigEndian:
+        return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3];
+      case HexMode::BytesLittleEndian:
+      case HexMode::StreamLittleEndian:
+        return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0];
+      default:
+        assert(false);
+        return 0;
+    }
+  }
+
+  const char* filename_;
+  const std::vector<char>& stream_;
+  std::vector<uint32_t>* data_;
+
+  HexMode mode_ = HexMode::Words;
+  size_t current_ = 0;
+  bool encountered_error_ = false;
+};
 }  // namespace

 bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
@ -87,12 +361,39 @@ bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
    fp = stdin;
  }

-  ReadFile(fp, data);
-  bool succeeded = WasFileCorrectlyRead<uint32_t>(fp, filename);
+  // Read into a char vector first.  If this is a hex stream, it needs to be
+  // processed as such.
+  std::vector<char> data_raw;
+  ReadFile(fp, &data_raw);
+  bool succeeded = WasFileCorrectlyRead(fp, filename);
  if (use_file && fp) fclose(fp);
+
+  if (!succeeded) {
+    return false;
+  }
+
+  if (IsHexStream(data_raw)) {
+    // If a hex stream, parse it and fill |data|.
+    HexTokenizer tokenizer(filename, data_raw, data);
+    succeeded = tokenizer.Parse();
+  } else {
+    // If not a hex stream, convert it to uint32_t via memcpy.
+    succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t));
+    if (succeeded) {
+      data->resize(data_raw.size() / sizeof(uint32_t), 0);
+      memcpy(data->data(), data_raw.data(), data_raw.size());
+    }
+  }
+
  return succeeded;
 }

+bool ConvertHexToBinary(const std::vector<char>& stream,
+                        std::vector<uint32_t>* data) {
+  HexTokenizer tokenizer("<input string>", stream, data);
+  return tokenizer.Parse();
+}
+
 bool ReadTextFile(const char* filename, std::vector<char>* data) {
  assert(data->empty());

@ -106,7 +407,7 @@ bool ReadTextFile(const char* filename, std::vector<char>* data) {
  }

  ReadFile(fp, data);
-  bool succeeded = WasFileCorrectlyRead<char>(fp, filename);
+  bool succeeded = WasFileCorrectlyRead(fp, filename);
  if (use_file && fp) fclose(fp);
  return succeeded;
 }
--- a/tools/io.h
+++ b/tools/io.h
@ -25,8 +25,28 @@
 // file. If |filename| is nullptr or "-", reads from the standard input, but
 // reopened as a binary file. If any error occurs, writes error messages to
 // standard error and returns false.
+//
+// If the given input is detected to be in ascii hex, it is converted to binary
+// automatically.  In that case, the shape of the input data is determined based
+// on the representation of the magic number:
+//
+//  * "[0]x[0]7230203": Every following "0x..." represents a word.
+//  * "[0]x[0]7[,] [0]x23...": Every following "0x..." represents a byte, stored
+//    in big-endian order
+//  * "[0]x[0]3[,] [0]x[0]2...": Every following "0x..." represents a byte,
+//    stored in little-endian order
+//  * "07[, ]23...": Every following "XY" represents a byte, stored in
+//    big-endian order
+//  * "03[, ]02...": Every following "XY" represents a byte, stored in
+//    little-endian order
 bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data);

+// The hex->binary logic of |ReadBinaryFile| applied to a pre-loaded stream of
+// bytes.  Used by tests to avoid having to call |ReadBinaryFile| with temp
+// files.  Returns false in case of parse errors.
+bool ConvertHexToBinary(const std::vector<char>& stream,
+                        std::vector<uint32_t>* data);
+
 // Sets the contents of the file named |filename| in |data|, assuming each
 // element in the file is of type |char|. The file is opened as a text file.  If
 // |filename| is nullptr or "-", reads from the standard input, but reopened as