1
0
mirror of https://github.com/microsoft/DirectXTex synced 2024-09-19 15:19:56 +00:00

Update GPU encoder to use DirectCompute 5 (#108)

This commit is contained in:
Chuck Walbourn 2023-03-20 10:49:19 -07:00 committed by GitHub
parent 8c47482183
commit 574745f290
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 35 deletions

View File

@ -20,13 +20,27 @@ using Microsoft::WRL::ComPtr;
namespace
{
#include "BC7Encode_EncodeBlockCS.inc"
#include "BC7Encode_TryMode02CS.inc"
#include "BC7Encode_TryMode137CS.inc"
#include "BC7Encode_TryMode456CS.inc"
#include "BC6HEncode_EncodeBlockCS.inc"
#include "BC6HEncode_TryModeG10CS.inc"
#include "BC6HEncode_TryModeLE10CS.inc"
namespace cs5
{
#include "BC7Encode_EncodeBlockCS.inc"
#include "BC7Encode_TryMode02CS.inc"
#include "BC7Encode_TryMode137CS.inc"
#include "BC7Encode_TryMode456CS.inc"
#include "BC6HEncode_EncodeBlockCS.inc"
#include "BC6HEncode_TryModeG10CS.inc"
#include "BC6HEncode_TryModeLE10CS.inc"
}
namespace cs4
{
#include "BC7Encode_EncodeBlockCS_cs40.inc"
#include "BC7Encode_TryMode02CS_cs40.inc"
#include "BC7Encode_TryMode137CS_cs40.inc"
#include "BC7Encode_TryMode456CS_cs40.inc"
#include "BC6HEncode_EncodeBlockCS_cs40.inc"
#include "BC6HEncode_TryModeG10CS_cs40.inc"
#include "BC6HEncode_TryModeLE10CS_cs40.inc"
}
struct BufferBC6HBC7
{
@ -132,39 +146,53 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice)
//--- Create compute shader library: BC6H -----------------------------------------
// Modes 11-14
HRESULT hr = pDevice->CreateComputeShader(BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
auto blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeG10CS : cs4::BC6HEncode_TryModeG10CS;
auto blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeG10CS) : sizeof(cs4::BC6HEncode_TryModeG10CS);
HRESULT hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
// Modes 1-10
hr = pDevice->CreateComputeShader(BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeLE10CS : cs4::BC6HEncode_TryModeLE10CS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeLE10CS) : sizeof(cs4::BC6HEncode_TryModeLE10CS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
// Encode
hr = pDevice->CreateComputeShader(BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_EncodeBlockCS : cs4::BC6HEncode_EncodeBlockCS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_EncodeBlockCS) : sizeof(cs4::BC6HEncode_EncodeBlockCS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
//--- Create compute shader library: BC7 ------------------------------------------
// Modes 4, 5, 6
hr = pDevice->CreateComputeShader(BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode456CS : cs4::BC7Encode_TryMode456CS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode456CS) : sizeof(cs4::BC7Encode_TryMode456CS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
// Modes 1, 3, 7
hr = pDevice->CreateComputeShader(BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode137CS : cs4::BC7Encode_TryMode137CS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode137CS) : sizeof(cs4::BC7Encode_TryMode137CS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
// Modes 0, 2
hr = pDevice->CreateComputeShader(BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode02CS : cs4::BC7Encode_TryMode02CS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode02CS) : sizeof(cs4::BC7Encode_TryMode02CS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;
// Encode
hr = pDevice->CreateComputeShader(BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_EncodeBlockCS : cs4::BC7Encode_EncodeBlockCS;
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_EncodeBlockCS) : sizeof(cs4::BC7Encode_EncodeBlockCS);
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
if (FAILED(hr))
return hr;

View File

@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
}
}
#ifdef EMULATE_F16C
uint float2half1(float f)
{
uint Result;
@ -1014,23 +1015,15 @@ uint float2half1(float f)
}
return (Result|Sign);
}
#endif
uint3 float2half(float3 endPoint_f)
{
//uint3 sign = asuint(endPoint_f) & 0x80000000;
//uint3 expo = asuint(endPoint_f) & 0x7F800000;
//uint3 base = asuint(endPoint_f) & 0x007FFFFF;
//return ( expo < 0x33800000 ) ? 0
// //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
// : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
// //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
// : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
// // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
// // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
// : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
#ifdef EMULATE_F16C
return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
#else
return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z));
#endif
}
int3 start_quantize(uint3 pixel_h)
{
@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int
palette = finish_unquantize(tmp);
}
#ifdef EMULATE_F16C
float half2float1(uint Value)
{
uint Mantissa = (uint)(Value & 0x03FF);
@ -1240,16 +1234,15 @@ float half2float1(uint Value)
return asfloat(Result);
}
#endif
float3 half2float(uint3 color_h)
{
//uint3 sign = color_h & 0x8000;
//uint3 expo = color_h & 0x7C00;
//uint3 base = color_h & 0x03FF;
//return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
// : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
#ifdef EMULATE_F16C
return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
#else
return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z));
#endif
}
void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10

View File

@ -46,8 +46,11 @@ endlocal
exit /b 0
:CompileShader
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2
echo.
echo %fxc%
%fxc% || set error=1
echo %fxc4%
%fxc4% || set error=1
exit /b

View File

@ -1117,7 +1117,8 @@ namespace
hr = pAdapter->GetDesc(&desc);
if (SUCCEEDED(hr))
{
wprintf(L"\n[Using DirectCompute on \"%ls\"]\n", desc.Description);
wprintf(L"\n[Using DirectCompute %ls on \"%ls\"]\n",
(fl >= D3D_FEATURE_LEVEL_11_0) ? L"5.0" : L"4.0", desc.Description);
}
}
}