mirror of
https://github.com/microsoft/DirectXTex
synced 2024-11-21 12:00:06 +00:00
Update GPU encoder to use DirectCompute 5 (#108)
This commit is contained in:
parent
8c47482183
commit
574745f290
@ -20,13 +20,27 @@ using Microsoft::WRL::ComPtr;
|
||||
|
||||
namespace
|
||||
{
|
||||
#include "BC7Encode_EncodeBlockCS.inc"
|
||||
#include "BC7Encode_TryMode02CS.inc"
|
||||
#include "BC7Encode_TryMode137CS.inc"
|
||||
#include "BC7Encode_TryMode456CS.inc"
|
||||
#include "BC6HEncode_EncodeBlockCS.inc"
|
||||
#include "BC6HEncode_TryModeG10CS.inc"
|
||||
#include "BC6HEncode_TryModeLE10CS.inc"
|
||||
namespace cs5
|
||||
{
|
||||
#include "BC7Encode_EncodeBlockCS.inc"
|
||||
#include "BC7Encode_TryMode02CS.inc"
|
||||
#include "BC7Encode_TryMode137CS.inc"
|
||||
#include "BC7Encode_TryMode456CS.inc"
|
||||
#include "BC6HEncode_EncodeBlockCS.inc"
|
||||
#include "BC6HEncode_TryModeG10CS.inc"
|
||||
#include "BC6HEncode_TryModeLE10CS.inc"
|
||||
}
|
||||
|
||||
namespace cs4
|
||||
{
|
||||
#include "BC7Encode_EncodeBlockCS_cs40.inc"
|
||||
#include "BC7Encode_TryMode02CS_cs40.inc"
|
||||
#include "BC7Encode_TryMode137CS_cs40.inc"
|
||||
#include "BC7Encode_TryMode456CS_cs40.inc"
|
||||
#include "BC6HEncode_EncodeBlockCS_cs40.inc"
|
||||
#include "BC6HEncode_TryModeG10CS_cs40.inc"
|
||||
#include "BC6HEncode_TryModeLE10CS_cs40.inc"
|
||||
}
|
||||
|
||||
struct BufferBC6HBC7
|
||||
{
|
||||
@ -132,39 +146,53 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice)
|
||||
//--- Create compute shader library: BC6H -----------------------------------------
|
||||
|
||||
// Modes 11-14
|
||||
HRESULT hr = pDevice->CreateComputeShader(BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
|
||||
auto blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeG10CS : cs4::BC6HEncode_TryModeG10CS;
|
||||
auto blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeG10CS) : sizeof(cs4::BC6HEncode_TryModeG10CS);
|
||||
HRESULT hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Modes 1-10
|
||||
hr = pDevice->CreateComputeShader(BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeLE10CS : cs4::BC6HEncode_TryModeLE10CS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeLE10CS) : sizeof(cs4::BC6HEncode_TryModeLE10CS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Encode
|
||||
hr = pDevice->CreateComputeShader(BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_EncodeBlockCS : cs4::BC6HEncode_EncodeBlockCS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_EncodeBlockCS) : sizeof(cs4::BC6HEncode_EncodeBlockCS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
//--- Create compute shader library: BC7 ------------------------------------------
|
||||
|
||||
// Modes 4, 5, 6
|
||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode456CS : cs4::BC7Encode_TryMode456CS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode456CS) : sizeof(cs4::BC7Encode_TryMode456CS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Modes 1, 3, 7
|
||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode137CS : cs4::BC7Encode_TryMode137CS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode137CS) : sizeof(cs4::BC7Encode_TryMode137CS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Modes 0, 2
|
||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode02CS : cs4::BC7Encode_TryMode02CS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode02CS) : sizeof(cs4::BC7Encode_TryMode02CS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Encode
|
||||
hr = pDevice->CreateComputeShader(BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_EncodeBlockCS : cs4::BC7Encode_EncodeBlockCS;
|
||||
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_EncodeBlockCS) : sizeof(cs4::BC7Encode_EncodeBlockCS);
|
||||
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
|
@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EMULATE_F16C
|
||||
uint float2half1(float f)
|
||||
{
|
||||
uint Result;
|
||||
@ -1014,23 +1015,15 @@ uint float2half1(float f)
|
||||
}
|
||||
return (Result|Sign);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint3 float2half(float3 endPoint_f)
|
||||
{
|
||||
//uint3 sign = asuint(endPoint_f) & 0x80000000;
|
||||
//uint3 expo = asuint(endPoint_f) & 0x7F800000;
|
||||
//uint3 base = asuint(endPoint_f) & 0x007FFFFF;
|
||||
//return ( expo < 0x33800000 ) ? 0
|
||||
// //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
|
||||
// : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
|
||||
// //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
|
||||
// : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
|
||||
// // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
|
||||
// // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
|
||||
// : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
|
||||
|
||||
|
||||
#ifdef EMULATE_F16C
|
||||
return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
|
||||
#else
|
||||
return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z));
|
||||
#endif
|
||||
}
|
||||
int3 start_quantize(uint3 pixel_h)
|
||||
{
|
||||
@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int
|
||||
palette = finish_unquantize(tmp);
|
||||
}
|
||||
|
||||
#ifdef EMULATE_F16C
|
||||
float half2float1(uint Value)
|
||||
{
|
||||
uint Mantissa = (uint)(Value & 0x03FF);
|
||||
@ -1240,16 +1234,15 @@ float half2float1(uint Value)
|
||||
|
||||
return asfloat(Result);
|
||||
}
|
||||
#endif
|
||||
|
||||
float3 half2float(uint3 color_h)
|
||||
{
|
||||
//uint3 sign = color_h & 0x8000;
|
||||
//uint3 expo = color_h & 0x7C00;
|
||||
//uint3 base = color_h & 0x03FF;
|
||||
//return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
|
||||
// : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
|
||||
|
||||
#ifdef EMULATE_F16C
|
||||
return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
|
||||
#else
|
||||
return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z));
|
||||
#endif
|
||||
}
|
||||
|
||||
void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10
|
||||
|
@ -46,8 +46,11 @@ endlocal
|
||||
exit /b 0
|
||||
|
||||
:CompileShader
|
||||
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
|
||||
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
|
||||
set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2
|
||||
echo.
|
||||
echo %fxc%
|
||||
%fxc% || set error=1
|
||||
echo %fxc4%
|
||||
%fxc4% || set error=1
|
||||
exit /b
|
||||
|
@ -1117,7 +1117,8 @@ namespace
|
||||
hr = pAdapter->GetDesc(&desc);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
wprintf(L"\n[Using DirectCompute on \"%ls\"]\n", desc.Description);
|
||||
wprintf(L"\n[Using DirectCompute %ls on \"%ls\"]\n",
|
||||
(fl >= D3D_FEATURE_LEVEL_11_0) ? L"5.0" : L"4.0", desc.Description);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user