1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-11-28 23:01:05 +00:00

Fixed overread problem with XMUNIBBLE4 and other 16-bit packed types (#128)

This commit is contained in:
Chuck Walbourn 2021-09-10 17:45:22 -07:00 committed by GitHub
parent 792ae2adc0
commit 77faf5af62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -985,7 +985,8 @@ inline XMVECTOR XM_CALLCONV XMLoadByteN2(const XMBYTEN2* pSource) noexcept
static const XMVECTORF32 Scale = { { { 1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 0, 0 } } }; static const XMVECTORF32 Scale = { { { 1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 0, 0 } } };
static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } };
// Splat the color in all four entries (x,z,y,w) // Splat the color in all four entries (x,z,y,w)
XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float*>(&pSource->x)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vTemp = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask // Mask
vTemp = _mm_and_ps(vTemp, Mask); vTemp = _mm_and_ps(vTemp, Mask);
// x,y and z are unsigned! Flip the bits to convert the order to signed // x,y and z are unsigned! Flip the bits to convert the order to signed
@ -1024,7 +1025,8 @@ inline XMVECTOR XM_CALLCONV XMLoadByte2(const XMBYTE2* pSource) noexcept
static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f) } } }; static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f) } } };
static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } };
// Splat the color in all four entries (x,z,y,w) // Splat the color in all four entries (x,z,y,w)
XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float*>(&pSource->x)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vTemp = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask // Mask
vTemp = _mm_and_ps(vTemp, Mask); vTemp = _mm_and_ps(vTemp, Mask);
// x,y and z are unsigned! Flip the bits to convert the order to signed // x,y and z are unsigned! Flip the bits to convert the order to signed
@ -1062,7 +1064,8 @@ inline XMVECTOR XM_CALLCONV XMLoadUByteN2(const XMUBYTEN2* pSource) noexcept
static const XMVECTORF32 Scale = { { { 1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 0, 0 } } }; static const XMVECTORF32 Scale = { { { 1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 0, 0 } } };
static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } };
// Splat the color in all four entries (x,z,y,w) // Splat the color in all four entries (x,z,y,w)
XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float*>(&pSource->x)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vTemp = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask // Mask
vTemp = _mm_and_ps(vTemp, Mask); vTemp = _mm_and_ps(vTemp, Mask);
// w is signed! Flip the bits to convert the order to unsigned // w is signed! Flip the bits to convert the order to unsigned
@ -1099,7 +1102,8 @@ inline XMVECTOR XM_CALLCONV XMLoadUByte2(const XMUBYTE2* pSource) noexcept
static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 0, 0 } } }; static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 0, 0 } } };
static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } };
// Splat the color in all four entries (x,z,y,w) // Splat the color in all four entries (x,z,y,w)
XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float*>(&pSource->x)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vTemp = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask // Mask
vTemp = _mm_and_ps(vTemp, Mask); vTemp = _mm_and_ps(vTemp, Mask);
// w is signed! Flip the bits to convert the order to unsigned // w is signed! Flip the bits to convert the order to unsigned
@ -1137,8 +1141,9 @@ inline XMVECTOR XM_CALLCONV XMLoadU565(const XMU565* pSource) noexcept
#elif defined(_XM_SSE_INTRINSICS_) #elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORI32 U565And = { { { 0x1F, 0x3F << 5, 0x1F << 11, 0 } } }; static const XMVECTORI32 U565And = { { { 0x1F, 0x3F << 5, 0x1F << 11, 0 } } };
static const XMVECTORF32 U565Mul = { { { 1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0 } } }; static const XMVECTORF32 U565Mul = { { { 1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0 } } };
// Get the 32 bit value and splat it // Get the 16 bit value and splat it
XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float*>(&pSource->v)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask off x, y and z // Mask off x, y and z
vResult = _mm_and_ps(vResult, U565And); vResult = _mm_and_ps(vResult, U565And);
// Convert to float // Convert to float
@ -1971,8 +1976,9 @@ inline XMVECTOR XM_CALLCONV XMLoadUNibble4(const XMUNIBBLE4* pSource) noexcept
#elif defined(_XM_SSE_INTRINSICS_) #elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORI32 UNibble4And = { { { 0xF, 0xF0, 0xF00, 0xF000 } } }; static const XMVECTORI32 UNibble4And = { { { 0xF, 0xF0, 0xF00, 0xF000 } } };
static const XMVECTORF32 UNibble4Mul = { { { 1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f } } }; static const XMVECTORF32 UNibble4Mul = { { { 1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f } } };
// Get the 32 bit value and splat it // Get the 16 bit value and splat it
XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float*>(&pSource->v)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0,0,0,0));
// Mask off x, y and z // Mask off x, y and z
vResult = _mm_and_ps(vResult, UNibble4And); vResult = _mm_and_ps(vResult, UNibble4And);
// Convert to float // Convert to float
@ -2007,8 +2013,9 @@ inline XMVECTOR XM_CALLCONV XMLoadU555(const XMU555* pSource) noexcept
#elif defined(_XM_SSE_INTRINSICS_) #elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORI32 U555And = { { { 0x1F, 0x1F << 5, 0x1F << 10, 0x8000 } } }; static const XMVECTORI32 U555And = { { { 0x1F, 0x1F << 5, 0x1F << 10, 0x8000 } } };
static const XMVECTORF32 U555Mul = { { { 1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f } } }; static const XMVECTORF32 U555Mul = { { { 1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f } } };
// Get the 32 bit value and splat it // Get the 16bit value and splat it
XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float*>(&pSource->v)); __m128i vInt = _mm_loadu_si16(&pSource->v);
XMVECTOR vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0));
// Mask off x, y and z // Mask off x, y and z
vResult = _mm_and_ps(vResult, U555And); vResult = _mm_and_ps(vResult, U555And);
// Convert to float // Convert to float