mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2024-12-27 02:10:15 +00:00
57a2441791
CPPreference.com has this description of digits10: “The value of std::numeric_limits<T>::digits10 is the number of base-10 digits that can be represented by the type T without change, that is, any number with this many significant decimal digits can be converted to a value of type T and back to decimal form, without change due to rounding or overflow.” This means that any number with this many digits can be represented accurately in the corresponding type. A change in any digit in a number after that may or may not cause it a different bitwise representation. Therefore this isn’t necessarily enough precision to accurately represent the value in text. Instead we need max_digits10 which has the following description: “The value of std::numeric_limits<T>::max_digits10 is the number of base-10 digits that are necessary to uniquely represent all distinct values of the type T, such as necessary for serialization/deserialization to text.” The patch includes a test case in hex_float_test which tries to do a round-robin conversion of a number that requires more than 6 decimal places to be accurately represented. This would fail without the patch. Sadly this also breaks a bunch of other tests. Some of the tests in hex_float_test use ldexp and then compare it with a value which is not the same as the one returned by ldexp but instead is the value rounded to 6 decimals. Others use values that are not evenly representable as a binary floating fraction but then happened to generate the same value when rounded to 6 decimals. Where the actual value didn’t seem to matter these have been changed with different values that can be represented as a binary fraction.
1330 lines
52 KiB
C++
1330 lines
52 KiB
C++
// Copyright (c) 2015-2016 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <cfloat>
|
|
#include <cmath>
|
|
#include <cstdio>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <tuple>
|
|
|
|
#include <gmock/gmock.h>
|
|
|
|
#include "source/util/hex_float.h"
|
|
#include "unit_spirv.h"
|
|
|
|
namespace {
|
|
using spvutils::BitwiseCast;
|
|
using spvutils::Float16;
|
|
using spvutils::FloatProxy;
|
|
using spvutils::HexFloat;
|
|
using spvutils::ParseNormalFloat;
|
|
using ::testing::Eq;
|
|
|
|
// In this file "encode" means converting a number into a string,
|
|
// and "decode" means converting a string into a number.
|
|
|
|
using HexFloatTest =
|
|
::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
|
|
using DecodeHexFloatTest =
|
|
::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
|
|
using HexDoubleTest =
|
|
::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
|
|
using DecodeHexDoubleTest =
|
|
::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
|
|
using RoundTripFloatTest = ::testing::TestWithParam<float>;
|
|
using RoundTripDoubleTest = ::testing::TestWithParam<double>;
|
|
|
|
// Hex-encodes a float value.
|
|
template <typename T>
|
|
std::string EncodeViaHexFloat(const T& value) {
|
|
std::stringstream ss;
|
|
ss << spvutils::HexFloat<T>(value);
|
|
return ss.str();
|
|
}
|
|
|
|
// The following two tests can't be DRY because they take different parameter
|
|
// types.
|
|
|
|
TEST_P(HexFloatTest, EncodeCorrectly) {
|
|
EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
|
|
}
|
|
|
|
TEST_P(HexDoubleTest, EncodeCorrectly) {
|
|
EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
|
|
}
|
|
|
|
// Decodes a hex-float string.
|
|
template <typename T>
|
|
FloatProxy<T> Decode(const std::string& str) {
|
|
spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
|
|
EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
|
|
return decoded.value();
|
|
}
|
|
|
|
TEST_P(HexFloatTest, DecodeCorrectly) {
|
|
EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
|
|
}
|
|
|
|
TEST_P(HexDoubleTest, DecodeCorrectly) {
|
|
EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32Tests, HexFloatTest,
|
|
::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
|
|
{0.f, "0x0p+0"},
|
|
{1.f, "0x1p+0"},
|
|
{2.f, "0x1p+1"},
|
|
{3.f, "0x1.8p+1"},
|
|
{0.5f, "0x1p-1"},
|
|
{0.25f, "0x1p-2"},
|
|
{0.75f, "0x1.8p-1"},
|
|
{-0.f, "-0x0p+0"},
|
|
{-1.f, "-0x1p+0"},
|
|
{-0.5f, "-0x1p-1"},
|
|
{-0.25f, "-0x1p-2"},
|
|
{-0.75f, "-0x1.8p-1"},
|
|
|
|
// Larger numbers
|
|
{512.f, "0x1p+9"},
|
|
{-512.f, "-0x1p+9"},
|
|
{1024.f, "0x1p+10"},
|
|
{-1024.f, "-0x1p+10"},
|
|
{1024.f + 8.f, "0x1.02p+10"},
|
|
{-1024.f - 8.f, "-0x1.02p+10"},
|
|
|
|
// Small numbers
|
|
{1.0f / 512.f, "0x1p-9"},
|
|
{1.0f / -512.f, "-0x1p-9"},
|
|
{1.0f / 1024.f, "0x1p-10"},
|
|
{1.0f / -1024.f, "-0x1p-10"},
|
|
{1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
|
|
{1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
|
|
|
|
// lowest non-denorm
|
|
{float(ldexp(1.0f, -126)), "0x1p-126"},
|
|
{float(ldexp(-1.0f, -126)), "-0x1p-126"},
|
|
|
|
// Denormalized values
|
|
{float(ldexp(1.0f, -127)), "0x1p-127"},
|
|
{float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
|
|
{float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
|
|
{float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
|
|
{float(ldexp(-1.0f, -127)), "-0x1p-127"},
|
|
{float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
|
|
{float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
|
|
{float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
|
|
|
|
{float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
|
|
{float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
|
|
"0x1.8p-128"},
|
|
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32NanTests, HexFloatTest,
|
|
::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
|
|
// Various NAN and INF cases
|
|
{uint32_t(0xFF800000), "-0x1p+128"}, // -inf
|
|
{uint32_t(0x7F800000), "0x1p+128"}, // inf
|
|
{uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan
|
|
{uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan
|
|
{uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan
|
|
{uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan
|
|
{uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan
|
|
{uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan
|
|
{uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan
|
|
{uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan
|
|
{uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan
|
|
{uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64Tests, HexDoubleTest,
|
|
::testing::ValuesIn(
|
|
std::vector<std::pair<FloatProxy<double>, std::string>>({
|
|
{0., "0x0p+0"},
|
|
{1., "0x1p+0"},
|
|
{2., "0x1p+1"},
|
|
{3., "0x1.8p+1"},
|
|
{0.5, "0x1p-1"},
|
|
{0.25, "0x1p-2"},
|
|
{0.75, "0x1.8p-1"},
|
|
{-0., "-0x0p+0"},
|
|
{-1., "-0x1p+0"},
|
|
{-0.5, "-0x1p-1"},
|
|
{-0.25, "-0x1p-2"},
|
|
{-0.75, "-0x1.8p-1"},
|
|
|
|
// Larger numbers
|
|
{512., "0x1p+9"},
|
|
{-512., "-0x1p+9"},
|
|
{1024., "0x1p+10"},
|
|
{-1024., "-0x1p+10"},
|
|
{1024. + 8., "0x1.02p+10"},
|
|
{-1024. - 8., "-0x1.02p+10"},
|
|
|
|
// Large outside the range of normal floats
|
|
{ldexp(1.0, 128), "0x1p+128"},
|
|
{ldexp(1.0, 129), "0x1p+129"},
|
|
{ldexp(-1.0, 128), "-0x1p+128"},
|
|
{ldexp(-1.0, 129), "-0x1p+129"},
|
|
{ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
|
|
{ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
|
|
{ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
|
|
{ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
|
|
|
|
// Small numbers
|
|
{1.0 / 512., "0x1p-9"},
|
|
{1.0 / -512., "-0x1p-9"},
|
|
{1.0 / 1024., "0x1p-10"},
|
|
{1.0 / -1024., "-0x1p-10"},
|
|
{1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
|
|
{1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
|
|
|
|
// Small outside the range of normal floats
|
|
{ldexp(1.0, -128), "0x1p-128"},
|
|
{ldexp(1.0, -129), "0x1p-129"},
|
|
{ldexp(-1.0, -128), "-0x1p-128"},
|
|
{ldexp(-1.0, -129), "-0x1p-129"},
|
|
{ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
|
|
{ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
|
|
{ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
|
|
{ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
|
|
|
|
// lowest non-denorm
|
|
{ldexp(1.0, -1022), "0x1p-1022"},
|
|
{ldexp(-1.0, -1022), "-0x1p-1022"},
|
|
|
|
// Denormalized values
|
|
{ldexp(1.0, -1023), "0x1p-1023"},
|
|
{ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
|
|
{ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
|
|
{ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
|
|
{ldexp(-1.0, -1024), "-0x1p-1024"},
|
|
{ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
|
|
{ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
|
|
{ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
|
|
|
|
{ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
|
|
{ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
|
|
"0x1.8p-1024"},
|
|
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64NanTests, HexDoubleTest,
|
|
::testing::ValuesIn(std::vector<
|
|
std::pair<FloatProxy<double>, std::string>>({
|
|
// Various NAN and INF cases
|
|
{uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, //-inf
|
|
{uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, //+inf
|
|
{uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan
|
|
{uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan
|
|
{uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan
|
|
{uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan
|
|
{uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan
|
|
{uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan
|
|
{uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan
|
|
{uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan
|
|
{uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan
|
|
{uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan
|
|
})), );
|
|
|
|
// Tests that encoding a value and decoding it again restores
|
|
// the same value.
|
|
TEST_P(RoundTripFloatTest, CanStoreAccurately) {
|
|
std::stringstream ss;
|
|
ss << FloatProxy<float>(GetParam());
|
|
ss.seekg(0);
|
|
FloatProxy<float> res;
|
|
ss >> res;
|
|
EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
|
|
}
|
|
|
|
TEST_P(RoundTripDoubleTest, CanStoreAccurately) {
|
|
std::stringstream ss;
|
|
ss << FloatProxy<double>(GetParam());
|
|
ss.seekg(0);
|
|
FloatProxy<double> res;
|
|
ss >> res;
|
|
EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32StoreTests, RoundTripFloatTest,
|
|
::testing::ValuesIn(std::vector<float>(
|
|
{// Value requiring more than 6 digits of precision to be
|
|
// represented accurately.
|
|
3.0000002f})));
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64StoreTests, RoundTripDoubleTest,
|
|
::testing::ValuesIn(std::vector<double>(
|
|
{// Value requiring more than 15 digits of precision to be
|
|
// represented accurately.
|
|
1.5000000000000002})));
|
|
|
|
TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
|
|
std::stringstream s;
|
|
s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
|
|
<< FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
|
|
EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
|
|
}
|
|
|
|
TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
|
|
std::stringstream s;
|
|
s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
|
|
<< FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
|
|
<< 9;
|
|
EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
|
|
}
|
|
|
|
TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
|
|
EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
|
|
}
|
|
|
|
TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
|
|
EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32DecodeTests, DecodeHexFloatTest,
|
|
::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
|
|
{"0x0p+000", 0.f},
|
|
{"0x0p0", 0.f},
|
|
{"0x0p-0", 0.f},
|
|
|
|
// flush to zero cases
|
|
{"0x1p-500", 0.f}, // Exponent underflows.
|
|
{"-0x1p-500", -0.f},
|
|
{"0x0.00000000001p-126", 0.f}, // Fraction causes underflow.
|
|
{"-0x0.0000000001p-127", -0.f},
|
|
{"-0x0.01p-142", -0.f}, // Fraction causes additional underflow.
|
|
{"0x0.01p-142", 0.f},
|
|
|
|
// Some floats that do not encode the same way as they decode.
|
|
{"0x2p+0", 2.f},
|
|
{"0xFFp+0", 255.f},
|
|
{"0x0.8p+0", 0.5f},
|
|
{"0x0.4p+0", 0.25f},
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32DecodeInfTests, DecodeHexFloatTest,
|
|
::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
|
|
// inf cases
|
|
{"-0x1p+128", uint32_t(0xFF800000)}, // -inf
|
|
{"0x32p+127", uint32_t(0x7F800000)}, // inf
|
|
{"0x32p+500", uint32_t(0x7F800000)}, // inf
|
|
{"-0x32p+127", uint32_t(0xFF800000)}, // -inf
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64DecodeTests, DecodeHexDoubleTest,
|
|
::testing::ValuesIn(
|
|
std::vector<std::pair<std::string, FloatProxy<double>>>({
|
|
{"0x0p+000", 0.},
|
|
{"0x0p0", 0.},
|
|
{"0x0p-0", 0.},
|
|
|
|
// flush to zero cases
|
|
{"0x1p-5000", 0.}, // Exponent underflows.
|
|
{"-0x1p-5000", -0.},
|
|
{"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow.
|
|
{"-0x0.000000000000001p-1024", -0.},
|
|
{"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow.
|
|
{"0x0.01p-1090", 0.},
|
|
|
|
// Some floats that do not encode the same way as they decode.
|
|
{"0x2p+0", 2.},
|
|
{"0xFFp+0", 255.},
|
|
{"0x0.8p+0", 0.5},
|
|
{"0x0.4p+0", 0.25},
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64DecodeInfTests, DecodeHexDoubleTest,
|
|
::testing::ValuesIn(
|
|
std::vector<std::pair<std::string, FloatProxy<double>>>({
|
|
// inf cases
|
|
{"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf
|
|
{"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf
|
|
{"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf
|
|
{"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf
|
|
})), );
|
|
|
|
TEST(FloatProxy, ValidConversion) {
|
|
EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
|
|
EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
|
|
EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
|
|
EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
|
|
EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
|
|
EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
|
|
|
|
EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
|
|
EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
|
|
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
|
|
EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
|
|
}
|
|
|
|
TEST(FloatProxy, Nan) {
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
|
|
EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
|
|
}
|
|
|
|
TEST(FloatProxy, Negation) {
|
|
EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
|
|
EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
|
|
|
|
EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
|
|
EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
|
|
|
|
EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
|
|
EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
|
|
|
|
EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
|
|
EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
|
|
|
|
EXPECT_THAT(
|
|
(-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
|
|
Eq(-std::numeric_limits<float>::infinity()));
|
|
EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
|
|
.getAsFloat(),
|
|
Eq(std::numeric_limits<float>::infinity()));
|
|
}
|
|
|
|
// Test conversion of FloatProxy values to strings.
|
|
//
|
|
// In previous cases, we always wrapped the FloatProxy value in a HexFloat
|
|
// before conversion to a string. In the following cases, the FloatProxy
|
|
// decides for itself whether to print as a regular number or as a hex float.
|
|
|
|
using FloatProxyFloatTest =
|
|
::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
|
|
using FloatProxyDoubleTest =
|
|
::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
|
|
|
|
// Converts a float value to a string via a FloatProxy.
|
|
template <typename T>
|
|
std::string EncodeViaFloatProxy(const T& value) {
|
|
std::stringstream ss;
|
|
ss << value;
|
|
return ss.str();
|
|
}
|
|
|
|
// Converts a floating point string so that the exponent prefix
|
|
// is 'e', and the exponent value does not have leading zeros.
|
|
// The Microsoft runtime library likes to write things like "2.5E+010".
|
|
// Convert that to "2.5e+10".
|
|
// We don't care what happens to strings that are not floating point
|
|
// strings.
|
|
std::string NormalizeExponentInFloatString(std::string in) {
|
|
std::string result;
|
|
// Reserve one spot for the terminating null, even when the sscanf fails.
|
|
std::vector<char> prefix(in.size() + 1);
|
|
char e;
|
|
char plus_or_minus;
|
|
int exponent; // in base 10
|
|
if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
|
|
&plus_or_minus, &exponent)) &&
|
|
(e == 'e' || e == 'E') &&
|
|
(plus_or_minus == '-' || plus_or_minus == '+')) {
|
|
// It looks like a floating point value with exponent.
|
|
std::stringstream out;
|
|
out << prefix.data() << 'e' << plus_or_minus << exponent;
|
|
result = out.str();
|
|
} else {
|
|
result = in;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
TEST(NormalizeFloat, Sample) {
|
|
EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
|
|
EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
|
|
EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
|
|
EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
|
|
EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
|
|
}
|
|
|
|
// The following two tests can't be DRY because they take different parameter
|
|
// types.
|
|
TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
|
|
EXPECT_THAT(
|
|
NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
|
|
Eq(GetParam().second));
|
|
}
|
|
|
|
TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
|
|
EXPECT_THAT(
|
|
NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
|
|
Eq(GetParam().second));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float32Tests, FloatProxyFloatTest,
|
|
::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
|
|
// Zero
|
|
{0.f, "0"},
|
|
// Normal numbers
|
|
{1.f, "1"},
|
|
{-0.25f, "-0.25"},
|
|
{1000.0f, "1000"},
|
|
|
|
// Still normal numbers, but with large magnitude exponents.
|
|
{float(ldexp(1.f, 126)), "8.50705917e+37"},
|
|
{float(ldexp(-1.f, -126)), "-1.17549435e-38"},
|
|
|
|
// denormalized values are printed as hex floats.
|
|
{float(ldexp(1.0f, -127)), "0x1p-127"},
|
|
{float(ldexp(1.5f, -128)), "0x1.8p-128"},
|
|
{float(ldexp(1.25, -129)), "0x1.4p-129"},
|
|
{float(ldexp(1.125, -130)), "0x1.2p-130"},
|
|
{float(ldexp(-1.0f, -127)), "-0x1p-127"},
|
|
{float(ldexp(-1.0f, -128)), "-0x1p-128"},
|
|
{float(ldexp(-1.0f, -129)), "-0x1p-129"},
|
|
{float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
|
|
|
|
// NaNs
|
|
{FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
|
|
{FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
|
|
|
|
{std::numeric_limits<float>::infinity(), "0x1p+128"},
|
|
{-std::numeric_limits<float>::infinity(), "-0x1p+128"},
|
|
})), );
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float64Tests, FloatProxyDoubleTest,
|
|
::testing::ValuesIn(
|
|
std::vector<std::pair<FloatProxy<double>, std::string>>({
|
|
{0., "0"},
|
|
{1., "1"},
|
|
{-0.25, "-0.25"},
|
|
{1000.0, "1000"},
|
|
|
|
// Large outside the range of normal floats
|
|
{ldexp(1.0, 128), "3.4028236692093846e+38"},
|
|
{ldexp(1.5, 129), "1.0208471007628154e+39"},
|
|
{ldexp(-1.0, 128), "-3.4028236692093846e+38"},
|
|
{ldexp(-1.5, 129), "-1.0208471007628154e+39"},
|
|
|
|
// Small outside the range of normal floats
|
|
{ldexp(1.5, -129), "2.2040519077917891e-39"},
|
|
{ldexp(-1.5, -129), "-2.2040519077917891e-39"},
|
|
|
|
// lowest non-denorm
|
|
{ldexp(1.0, -1022), "2.2250738585072014e-308"},
|
|
{ldexp(-1.0, -1022), "-2.2250738585072014e-308"},
|
|
|
|
// Denormalized values
|
|
{ldexp(1.125, -1023), "0x1.2p-1023"},
|
|
{ldexp(-1.375, -1024), "-0x1.6p-1024"},
|
|
|
|
// NaNs
|
|
{uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
|
|
{uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
|
|
|
|
// Infinity
|
|
{std::numeric_limits<double>::infinity(), "0x1p+1024"},
|
|
{-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
|
|
|
|
})), );
|
|
|
|
// double is used so that unbiased_exponent can be used with the output
|
|
// of ldexp directly.
|
|
int32_t unbiased_exponent(double f) {
|
|
return spvutils::HexFloat<spvutils::FloatProxy<float>>(static_cast<float>(f))
|
|
.getUnbiasedNormalizedExponent();
|
|
}
|
|
|
|
int16_t unbiased_half_exponent(uint16_t f) {
|
|
return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
|
|
.getUnbiasedNormalizedExponent();
|
|
}
|
|
|
|
TEST(HexFloatOperationTest, UnbiasedExponent) {
|
|
// Float cases
|
|
EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
|
|
EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
|
|
EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
|
|
EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
|
|
|
|
EXPECT_EQ(128, spvutils::HexFloat<spvutils::FloatProxy<float>>(
|
|
std::numeric_limits<float>::infinity())
|
|
.getUnbiasedNormalizedExponent());
|
|
|
|
EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
|
|
EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
|
|
EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
|
|
EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
|
|
EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
|
|
// Smallest representable number
|
|
EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
|
|
// Should get rounded to 0 first.
|
|
EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
|
|
|
|
// Float16 cases
|
|
// The exponent is represented in the bits 0x7C00
|
|
// The offset is -15
|
|
EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
|
|
EXPECT_EQ(3, unbiased_half_exponent(0x4800));
|
|
EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
|
|
EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
|
|
EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
|
|
EXPECT_EQ(10, unbiased_half_exponent(0x6400));
|
|
|
|
// Smallest representable number
|
|
EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
|
|
}
|
|
|
|
// Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
|
|
float float_fractions(const std::vector<uint32_t>& fractions) {
|
|
float f = 0;
|
|
for (int32_t i : fractions) {
|
|
f += std::ldexp(1.0f, -i);
|
|
}
|
|
return f;
|
|
}
|
|
|
|
// Returns the normalized significand of a HexFloat<FloatProxy<float>>
|
|
// that was created by calling float_fractions with the input fractions,
|
|
// raised to the power of exp.
|
|
uint32_t normalized_significand(const std::vector<uint32_t>& fractions,
|
|
uint32_t exp) {
|
|
return spvutils::HexFloat<spvutils::FloatProxy<float>>(
|
|
static_cast<float>(ldexp(float_fractions(fractions), exp)))
|
|
.getNormalizedSignificand();
|
|
}
|
|
|
|
// Sets the bits from MSB to LSB of the significand part of a float.
|
|
// For example 0 would set the bit 23 (counting from LSB to MSB),
|
|
// and 1 would set the 22nd bit.
|
|
uint32_t bits_set(const std::vector<uint32_t>& bits) {
|
|
const uint32_t top_bit = 1u << 22u;
|
|
uint32_t val = 0;
|
|
for (uint32_t i : bits) {
|
|
val |= top_bit >> i;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
// The same as bits_set but for a Float16 value instead of 32-bit floating
|
|
// point.
|
|
uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
|
|
const uint32_t top_bit = 1u << 9u;
|
|
uint32_t val = 0;
|
|
for (uint32_t i : bits) {
|
|
val |= top_bit >> i;
|
|
}
|
|
return static_cast<uint16_t>(val);
|
|
}
|
|
|
|
TEST(HexFloatOperationTest, NormalizedSignificand) {
|
|
// For normalized numbers (the following) it should be a simple matter
|
|
// of getting rid of the top implicit bit
|
|
EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
|
|
EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
|
|
EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
|
|
EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
|
|
EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
|
|
EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
|
|
|
|
// For denormalized numbers we expect the normalized significand to
|
|
// shift as if it were normalized. This means, in practice that the
|
|
// top_most set bit will be cut off. Looks very similar to above (on purpose)
|
|
EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
|
|
EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
|
|
EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
|
|
EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
|
|
EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
|
|
}
|
|
|
|
// Returns the 32-bit floating point value created by
|
|
// calling setFromSignUnbiasedExponentAndNormalizedSignificand
|
|
// on a HexFloat<FloatProxy<float>>
|
|
float set_from_sign(bool negative, int32_t unbiased_exponent,
|
|
uint32_t significand, bool round_denorm_up) {
|
|
spvutils::HexFloat<spvutils::FloatProxy<float>> f(0.f);
|
|
f.setFromSignUnbiasedExponentAndNormalizedSignificand(
|
|
negative, unbiased_exponent, significand, round_denorm_up);
|
|
return f.value().getAsFloat();
|
|
}
|
|
|
|
TEST(HexFloatOperationTests,
|
|
SetFromSignUnbiasedExponentAndNormalizedSignificand) {
|
|
EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
|
|
|
|
// Tests insertion of various denormalized numbers with and without round up.
|
|
EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
|
|
set_from_sign(false, -149, 0, false));
|
|
EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
|
|
set_from_sign(false, -149, 0, true));
|
|
EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
|
|
EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
|
|
set_from_sign(false, -150, 1, true));
|
|
|
|
EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
|
|
EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
|
|
EXPECT_EQ(float_fractions({0, 1, 2, 5}),
|
|
set_from_sign(false, 0, bits_set({0, 1, 4}), false));
|
|
EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
|
|
set_from_sign(false, -32, bits_set({0, 1, 4}), false));
|
|
EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
|
|
set_from_sign(false, -128, bits_set({0, 1, 4}), false));
|
|
|
|
// The negative cases from above.
|
|
EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
|
|
EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
|
|
EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
|
|
EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
|
|
set_from_sign(true, 0, bits_set({0, 1, 4}), false));
|
|
EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
|
|
set_from_sign(true, -32, bits_set({0, 1, 4}), false));
|
|
EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
|
|
set_from_sign(true, -128, bits_set({0, 1, 4}), false));
|
|
}
|
|
|
|
TEST(HexFloatOperationTests, NonRounding) {
|
|
// Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
|
|
// except in the denorm case which is a bit more complex.
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
bool carry_bit = false;
|
|
|
|
spvutils::round_direction rounding[] = {
|
|
spvutils::round_direction::kToZero,
|
|
spvutils::round_direction::kToNearestEven,
|
|
spvutils::round_direction::kToPositiveInfinity,
|
|
spvutils::round_direction::kToNegativeInfinity};
|
|
|
|
// Everything fits, so this should be straight-forward
|
|
for (spvutils::round_direction round : rounding) {
|
|
EXPECT_EQ(bits_set({}),
|
|
HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit));
|
|
EXPECT_FALSE(carry_bit);
|
|
|
|
EXPECT_EQ(bits_set({0}),
|
|
HF(float_fractions({0, 1}))
|
|
.getRoundedNormalizedSignificand<HF>(round, &carry_bit));
|
|
EXPECT_FALSE(carry_bit);
|
|
|
|
EXPECT_EQ(bits_set({1, 3}),
|
|
HF(float_fractions({0, 2, 4}))
|
|
.getRoundedNormalizedSignificand<HF>(round, &carry_bit));
|
|
EXPECT_FALSE(carry_bit);
|
|
|
|
EXPECT_EQ(
|
|
bits_set({0, 1, 4}),
|
|
HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
|
|
.getRoundedNormalizedSignificand<HF>(round, &carry_bit));
|
|
EXPECT_FALSE(carry_bit);
|
|
|
|
EXPECT_EQ(bits_set({0, 1, 4, 22}),
|
|
HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
|
|
.getRoundedNormalizedSignificand<HF>(round, &carry_bit));
|
|
EXPECT_FALSE(carry_bit);
|
|
}
|
|
}
|
|
|
|
using RD = spvutils::round_direction;
|
|
struct RoundSignificandCase {
|
|
float source_float;
|
|
std::pair<int16_t, bool> expected_results;
|
|
spvutils::round_direction round;
|
|
};
|
|
|
|
using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>;
|
|
|
|
TEST_P(HexFloatRoundTest, RoundDownToFP16) {
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
|
|
|
|
HF input_value(GetParam().source_float);
|
|
bool carry_bit = false;
|
|
EXPECT_EQ(GetParam().expected_results.first,
|
|
input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round,
|
|
&carry_bit));
|
|
EXPECT_EQ(carry_bit, GetParam().expected_results.second);
|
|
}
|
|
|
|
// clang-format off
|
|
INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatRoundTest,
|
|
::testing::ValuesIn(std::vector<RoundSignificandCase>(
|
|
{
|
|
{float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
|
|
{float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
|
|
{float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
|
|
{float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
|
|
{float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
{float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
|
|
{float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
|
|
|
|
{float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
|
|
{float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
|
|
{float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
|
|
|
|
{float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
{float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
|
|
{float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
|
|
|
|
{-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
{-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
|
|
{-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
|
|
{-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
|
|
|
|
{float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
{float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
|
|
{float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
|
|
|
|
// Carries
|
|
{float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
|
|
{float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
|
|
{float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
|
|
{float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
|
|
|
|
// Cases where original number was denorm. Note: this should have no effect
|
|
// the number is pre-normalized.
|
|
{static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
|
|
{static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
|
|
{static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
|
|
{static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
|
|
})),);
|
|
// clang-format on
|
|
|
|
struct UpCastSignificandCase {
|
|
uint16_t source_half;
|
|
uint32_t expected_result;
|
|
};
|
|
|
|
using HexFloatRoundUpSignificandTest =
|
|
::testing::TestWithParam<UpCastSignificandCase>;
|
|
TEST_P(HexFloatRoundUpSignificandTest, Widening) {
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
|
|
bool carry_bit = false;
|
|
|
|
spvutils::round_direction rounding[] = {
|
|
spvutils::round_direction::kToZero,
|
|
spvutils::round_direction::kToNearestEven,
|
|
spvutils::round_direction::kToPositiveInfinity,
|
|
spvutils::round_direction::kToNegativeInfinity};
|
|
|
|
// Everything fits, so everything should just be bit-shifts.
|
|
for (spvutils::round_direction round : rounding) {
|
|
carry_bit = false;
|
|
HF16 input_value(GetParam().source_half);
|
|
EXPECT_EQ(
|
|
GetParam().expected_result,
|
|
input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
|
|
<< std::hex << "0x"
|
|
<< input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
|
|
<< " 0x" << GetParam().expected_result;
|
|
EXPECT_FALSE(carry_bit);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
F16toF32, HexFloatRoundUpSignificandTest,
|
|
// 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
|
|
// They are ignored for this test.
|
|
::testing::ValuesIn(std::vector<UpCastSignificandCase>({
|
|
{0x3F00, 0x600000},
|
|
{0x0F00, 0x600000},
|
|
{0x0F01, 0x602000},
|
|
{0x0FFF, 0x7FE000},
|
|
})), );
|
|
|
|
struct DownCastTest {
|
|
float source_float;
|
|
uint16_t expected_half;
|
|
std::vector<spvutils::round_direction> directions;
|
|
};
|
|
|
|
std::string get_round_text(spvutils::round_direction direction) {
|
|
#define CASE(round_direction) \
|
|
case round_direction: \
|
|
return #round_direction
|
|
|
|
switch (direction) {
|
|
CASE(spvutils::round_direction::kToZero);
|
|
CASE(spvutils::round_direction::kToPositiveInfinity);
|
|
CASE(spvutils::round_direction::kToNegativeInfinity);
|
|
CASE(spvutils::round_direction::kToNearestEven);
|
|
}
|
|
#undef CASE
|
|
return "";
|
|
}
|
|
|
|
using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
|
|
|
|
TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
|
|
HF f(GetParam().source_float);
|
|
for (auto round : GetParam().directions) {
|
|
HF16 half(0);
|
|
f.castTo(half, round);
|
|
EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
|
|
<< get_round_text(round) << " " << std::hex
|
|
<< spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
|
|
<< " cast to: " << half.value().getAsFloat().get_value();
|
|
}
|
|
}
|
|
|
|
const uint16_t positive_infinity = 0x7C00;
|
|
const uint16_t negative_infinity = 0xFC00;
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
F32ToF16, HexFloatFP32To16Tests,
|
|
::testing::ValuesIn(std::vector<DownCastTest>({
|
|
// Exactly representable as half.
|
|
{0.f,
|
|
0x0,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{-0.f,
|
|
0x8000,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{1.0f,
|
|
0x3C00,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{-1.0f,
|
|
0xBC00,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
|
|
{float_fractions({0, 1, 10}),
|
|
0x3E01,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{-float_fractions({0, 1, 10}),
|
|
0xBE01,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)),
|
|
0x4A01,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)),
|
|
0xCA01,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
|
|
// Underflow
|
|
{static_cast<float>(ldexp(1.0f, -25)),
|
|
0x0,
|
|
{RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
|
|
{static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
|
|
{static_cast<float>(-ldexp(1.0f, -25)),
|
|
0x8000,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
|
|
{static_cast<float>(-ldexp(1.0f, -25)),
|
|
0x8001,
|
|
{RD::kToNegativeInfinity}},
|
|
{static_cast<float>(ldexp(1.0f, -24)),
|
|
0x1,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
|
|
// Overflow
|
|
{static_cast<float>(ldexp(1.0f, 16)),
|
|
positive_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(ldexp(1.0f, 18)),
|
|
positive_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(ldexp(1.3f, 16)),
|
|
positive_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(-ldexp(1.0f, 16)),
|
|
negative_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(-ldexp(1.0f, 18)),
|
|
negative_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{static_cast<float>(-ldexp(1.3f, 16)),
|
|
negative_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
|
|
// Transfer of Infinities
|
|
{std::numeric_limits<float>::infinity(),
|
|
positive_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
{-std::numeric_limits<float>::infinity(),
|
|
negative_infinity,
|
|
{RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
|
|
RD::kToNearestEven}},
|
|
|
|
// Nans are below because we cannot test for equality.
|
|
})), );
|
|
|
|
struct UpCastCase {
|
|
uint16_t source_half;
|
|
float expected_float;
|
|
};
|
|
|
|
using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
|
|
TEST_P(HexFloatFP16To32Tests, WideningCasts) {
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
|
|
HF16 f(GetParam().source_half);
|
|
|
|
spvutils::round_direction rounding[] = {
|
|
spvutils::round_direction::kToZero,
|
|
spvutils::round_direction::kToNearestEven,
|
|
spvutils::round_direction::kToPositiveInfinity,
|
|
spvutils::round_direction::kToNegativeInfinity};
|
|
|
|
// Everything fits, so everything should just be bit-shifts.
|
|
for (spvutils::round_direction round : rounding) {
|
|
HF flt(0.f);
|
|
f.castTo(flt, round);
|
|
EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
|
|
<< get_round_text(round) << " " << std::hex
|
|
<< spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
|
|
<< " cast to: " << flt.value().getAsFloat();
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
F16ToF32, HexFloatFP16To32Tests,
|
|
::testing::ValuesIn(std::vector<UpCastCase>({
|
|
{0x0000, 0.f},
|
|
{0x8000, -0.f},
|
|
{0x3C00, 1.0f},
|
|
{0xBC00, -1.0f},
|
|
{0x3F00, float_fractions({0, 1, 2})},
|
|
{0xBF00, -float_fractions({0, 1, 2})},
|
|
{0x3F01, float_fractions({0, 1, 2, 10})},
|
|
{0xBF01, -float_fractions({0, 1, 2, 10})},
|
|
|
|
// denorm
|
|
{0x0001, static_cast<float>(ldexp(1.0, -24))},
|
|
{0x0002, static_cast<float>(ldexp(1.0, -23))},
|
|
{0x8001, static_cast<float>(-ldexp(1.0, -24))},
|
|
{0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
|
|
|
|
// inf
|
|
{0x7C00, std::numeric_limits<float>::infinity()},
|
|
{0xFC00, -std::numeric_limits<float>::infinity()},
|
|
})), );
|
|
|
|
TEST(HexFloatOperationTests, NanTests) {
|
|
using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
|
|
using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
|
|
spvutils::round_direction rounding[] = {
|
|
spvutils::round_direction::kToZero,
|
|
spvutils::round_direction::kToNearestEven,
|
|
spvutils::round_direction::kToPositiveInfinity,
|
|
spvutils::round_direction::kToNegativeInfinity};
|
|
|
|
// Everything fits, so everything should just be bit-shifts.
|
|
for (spvutils::round_direction round : rounding) {
|
|
HF16 f16(0);
|
|
HF f(0.f);
|
|
HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
|
|
EXPECT_TRUE(f16.value().isNan());
|
|
HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
|
|
EXPECT_TRUE(f16.value().isNan());
|
|
|
|
HF16(0x7C01).castTo(f, round);
|
|
EXPECT_TRUE(f.value().isNan());
|
|
HF16(0x7C11).castTo(f, round);
|
|
EXPECT_TRUE(f.value().isNan());
|
|
HF16(0xFC01).castTo(f, round);
|
|
EXPECT_TRUE(f.value().isNan());
|
|
HF16(0x7C10).castTo(f, round);
|
|
EXPECT_TRUE(f.value().isNan());
|
|
HF16(0xFF00).castTo(f, round);
|
|
EXPECT_TRUE(f.value().isNan());
|
|
}
|
|
}
|
|
|
|
// A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
|
|
template <typename T>
|
|
struct FloatParseCase {
|
|
std::string literal;
|
|
bool negate_value;
|
|
bool expect_success;
|
|
HexFloat<FloatProxy<T>> expected_value;
|
|
};
|
|
|
|
using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
|
|
|
|
TEST_P(ParseNormalFloatTest, Samples) {
|
|
std::stringstream input(GetParam().literal);
|
|
HexFloat<FloatProxy<float>> parsed_value(0.0f);
|
|
ParseNormalFloat(input, GetParam().negate_value, parsed_value);
|
|
EXPECT_NE(GetParam().expect_success, input.fail())
|
|
<< " literal: " << GetParam().literal
|
|
<< " negate: " << GetParam().negate_value;
|
|
if (GetParam().expect_success) {
|
|
EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
|
|
<< " literal: " << GetParam().literal
|
|
<< " negate: " << GetParam().negate_value;
|
|
}
|
|
}
|
|
|
|
// Returns a FloatParseCase with expected failure.
|
|
template <typename T>
|
|
FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
|
|
T expected_value) {
|
|
HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
|
|
return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
|
|
}
|
|
|
|
// Returns a FloatParseCase that should successfully parse to a given value.
|
|
template <typename T>
|
|
FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
|
|
T expected_value) {
|
|
HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
|
|
return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
FloatParse, ParseNormalFloatTest,
|
|
::testing::ValuesIn(std::vector<FloatParseCase<float>>{
|
|
// Failing cases due to trivially incorrect syntax.
|
|
BadFloatParseCase("abc", false, 0.0f),
|
|
BadFloatParseCase("abc", true, 0.0f),
|
|
|
|
// Valid cases.
|
|
GoodFloatParseCase("0", false, 0.0f),
|
|
GoodFloatParseCase("0.0", false, 0.0f),
|
|
GoodFloatParseCase("-0.0", false, -0.0f),
|
|
GoodFloatParseCase("2.0", false, 2.0f),
|
|
GoodFloatParseCase("-2.0", false, -2.0f),
|
|
GoodFloatParseCase("+2.0", false, 2.0f),
|
|
// Cases with negate_value being true.
|
|
GoodFloatParseCase("0.0", true, -0.0f),
|
|
GoodFloatParseCase("2.0", true, -2.0f),
|
|
|
|
// When negate_value is true, we should not accept a
|
|
// leading minus or plus.
|
|
BadFloatParseCase("-0.0", true, 0.0f),
|
|
BadFloatParseCase("-2.0", true, 0.0f),
|
|
BadFloatParseCase("+0.0", true, 0.0f),
|
|
BadFloatParseCase("+2.0", true, 0.0f),
|
|
|
|
// Overflow is an error for 32-bit float parsing.
|
|
BadFloatParseCase("1e40", false, FLT_MAX),
|
|
BadFloatParseCase("1e40", true, -FLT_MAX),
|
|
BadFloatParseCase("-1e40", false, -FLT_MAX),
|
|
// We can't have -1e40 and negate_value == true since
|
|
// that represents an original case of "--1e40" which
|
|
// is invalid.
|
|
}), );
|
|
|
|
using ParseNormalFloat16Test =
|
|
::testing::TestWithParam<FloatParseCase<Float16>>;
|
|
|
|
TEST_P(ParseNormalFloat16Test, Samples) {
|
|
std::stringstream input(GetParam().literal);
|
|
HexFloat<FloatProxy<Float16>> parsed_value(0);
|
|
ParseNormalFloat(input, GetParam().negate_value, parsed_value);
|
|
EXPECT_NE(GetParam().expect_success, input.fail())
|
|
<< " literal: " << GetParam().literal
|
|
<< " negate: " << GetParam().negate_value;
|
|
if (GetParam().expect_success) {
|
|
EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
|
|
<< " literal: " << GetParam().literal
|
|
<< " negate: " << GetParam().negate_value;
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float16Parse, ParseNormalFloat16Test,
|
|
::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
|
|
// Failing cases due to trivially incorrect syntax.
|
|
BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
|
|
BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
|
|
|
|
// Valid cases.
|
|
GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
|
|
GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
|
|
GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
|
|
GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
|
|
GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
|
|
GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
|
|
// Cases with negate_value being true.
|
|
GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
|
|
GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
|
|
|
|
// When negate_value is true, we should not accept a leading minus or
|
|
// plus.
|
|
BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
|
|
BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
|
|
BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
|
|
BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
|
|
}), );
|
|
|
|
// A test case for detecting infinities.
|
|
template <typename T>
|
|
struct OverflowParseCase {
|
|
std::string input;
|
|
bool expect_success;
|
|
T expected_value;
|
|
};
|
|
|
|
using FloatProxyParseOverflowFloatTest =
|
|
::testing::TestWithParam<OverflowParseCase<float>>;
|
|
|
|
TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
|
|
std::istringstream input(GetParam().input);
|
|
HexFloat<FloatProxy<float>> value(0.0f);
|
|
input >> value;
|
|
EXPECT_NE(GetParam().expect_success, input.fail());
|
|
if (GetParam().expect_success) {
|
|
EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
FloatOverflow, FloatProxyParseOverflowFloatTest,
|
|
::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
|
|
{"0", true, 0.0f},
|
|
{"0.0", true, 0.0f},
|
|
{"1.0", true, 1.0f},
|
|
{"1e38", true, 1e38f},
|
|
{"-1e38", true, -1e38f},
|
|
{"1e40", false, FLT_MAX},
|
|
{"-1e40", false, -FLT_MAX},
|
|
{"1e400", false, FLT_MAX},
|
|
{"-1e400", false, -FLT_MAX},
|
|
})), );
|
|
|
|
using FloatProxyParseOverflowDoubleTest =
|
|
::testing::TestWithParam<OverflowParseCase<double>>;
|
|
|
|
TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
|
|
std::istringstream input(GetParam().input);
|
|
HexFloat<FloatProxy<double>> value(0.0);
|
|
input >> value;
|
|
EXPECT_NE(GetParam().expect_success, input.fail());
|
|
if (GetParam().expect_success) {
|
|
EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
DoubleOverflow, FloatProxyParseOverflowDoubleTest,
|
|
::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
|
|
{"0", true, 0.0},
|
|
{"0.0", true, 0.0},
|
|
{"1.0", true, 1.0},
|
|
{"1e38", true, 1e38},
|
|
{"-1e38", true, -1e38},
|
|
{"1e40", true, 1e40},
|
|
{"-1e40", true, -1e40},
|
|
{"1e400", false, DBL_MAX},
|
|
{"-1e400", false, -DBL_MAX},
|
|
})), );
|
|
|
|
using FloatProxyParseOverflowFloat16Test =
|
|
::testing::TestWithParam<OverflowParseCase<uint16_t>>;
|
|
|
|
TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
|
|
std::istringstream input(GetParam().input);
|
|
HexFloat<FloatProxy<Float16>> value(0);
|
|
input >> value;
|
|
EXPECT_NE(GetParam().expect_success, input.fail())
|
|
<< " literal: " << GetParam().input;
|
|
if (GetParam().expect_success) {
|
|
EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
|
|
<< " literal: " << GetParam().input;
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
Float16Overflow, FloatProxyParseOverflowFloat16Test,
|
|
::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
|
|
{"0", true, uint16_t{0}},
|
|
{"0.0", true, uint16_t{0}},
|
|
{"1.0", true, uint16_t{0x3c00}},
|
|
// Overflow for 16-bit float is an error, and returns max or
|
|
// lowest value.
|
|
{"1e38", false, uint16_t{0x7bff}},
|
|
{"1e40", false, uint16_t{0x7bff}},
|
|
{"1e400", false, uint16_t{0x7bff}},
|
|
{"-1e38", false, uint16_t{0xfbff}},
|
|
{"-1e40", false, uint16_t{0xfbff}},
|
|
{"-1e400", false, uint16_t{0xfbff}},
|
|
})), );
|
|
|
|
TEST(FloatProxy, Max) {
|
|
EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
|
|
Eq(uint16_t{0x7bff}));
|
|
EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
|
|
Eq(std::numeric_limits<float>::max()));
|
|
EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
|
|
Eq(std::numeric_limits<double>::max()));
|
|
}
|
|
|
|
TEST(FloatProxy, Lowest) {
|
|
EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
|
|
Eq(uint16_t{0xfbff}));
|
|
EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
|
|
Eq(std::numeric_limits<float>::lowest()));
|
|
EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
|
|
Eq(std::numeric_limits<double>::lowest()));
|
|
}
|
|
|
|
// TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
|
|
} // anonymous namespace
|