1
0
mirror of https://github.com/microsoft/DirectXTex synced 2024-12-26 19:31:02 +00:00

DirectXTex: BC6H / BC7 DirectCompute codec integration

This commit is contained in:
walbourn_cp 2013-08-05 10:58:54 -07:00
parent 851b8d4be1
commit 8eec65caf4
28 changed files with 57379 additions and 33 deletions

View File

@ -0,0 +1,599 @@
//-------------------------------------------------------------------------------------
// BCDirectCompute.cpp
//
// Direct3D 11 Compute Shader BC Compressor
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//-------------------------------------------------------------------------------------
#include "directxtexp.h"
#include "BCDirectCompute.h"
#if defined(_DEBUG) || defined(PROFILE)
#pragma comment(lib,"dxguid.lib")
#endif
namespace
{
#include "Shaders\Compiled\BC7Encode_EncodeBlockCS.inc"
#include "Shaders\Compiled\BC7Encode_TryMode02CS.inc"
#include "Shaders\Compiled\BC7Encode_TryMode137CS.inc"
#include "Shaders\Compiled\BC7Encode_TryMode456CS.inc"
#include "Shaders\Compiled\BC6HEncode_EncodeBlockCS.inc"
#include "Shaders\Compiled\BC6HEncode_TryModeG10CS.inc"
#include "Shaders\Compiled\BC6HEncode_TryModeLE10CS.inc"
struct BufferBC6HBC7
{
UINT color[4];
};
struct ConstantsBC6HBC7
{
UINT tex_width;
UINT num_block_x;
UINT format;
UINT mode_id;
UINT start_block_id;
UINT num_total_blocks;
float alpha_weight;
UINT reserved;
};
static_assert( sizeof(ConstantsBC6HBC7) == sizeof(UINT)*8, "Constant buffer size mismatch" );
inline void RunComputeShader( ID3D11DeviceContext* pContext,
ID3D11ComputeShader* shader,
ID3D11ShaderResourceView** pSRVs,
UINT srvCount,
ID3D11Buffer* pCB,
ID3D11UnorderedAccessView* pUAV,
UINT X )
{
// Force UAV to nullptr before setting SRV since we are swapping buffers
ID3D11UnorderedAccessView* nullUAV = nullptr;
pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr );
pContext->CSSetShader( shader, nullptr, 0 );
pContext->CSSetShaderResources( 0, srvCount, pSRVs );
pContext->CSSetUnorderedAccessViews( 0, 1, &pUAV, nullptr );
pContext->CSSetConstantBuffers( 0, 1, &pCB );
pContext->Dispatch( X, 1, 1 );
}
inline void ResetContext( ID3D11DeviceContext* pContext )
{
ID3D11UnorderedAccessView* nullUAV = nullptr;
pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr );
ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr };
pContext->CSSetShaderResources( 0, 3, nullSRV );
ID3D11Buffer* nullBuffer[1] = { nullptr };
pContext->CSSetConstantBuffers( 0, 1, nullBuffer );
}
};
namespace DirectX
{
GPUCompressBC::GPUCompressBC() :
m_bcformat(DXGI_FORMAT_UNKNOWN),
m_srcformat(DXGI_FORMAT_UNKNOWN),
m_alphaWeight(1.f),
m_width(0),
m_height(0)
{
}
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT GPUCompressBC::Initialize( ID3D11Device* pDevice )
{
if ( !pDevice )
return E_INVALIDARG;
// Check for DirectCompute support
D3D_FEATURE_LEVEL fl = pDevice->GetFeatureLevel();
if ( fl < D3D_FEATURE_LEVEL_10_0 )
{
// DirectCompute not supported on Feature Level 9.x hardware
return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
}
if ( fl < D3D_FEATURE_LEVEL_11_0 )
{
// DirectCompute support on Feature Level 10.x hardware is optional, and this function needs it
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
HRESULT hr = pDevice->CheckFeatureSupport( D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts) );
if ( FAILED(hr) )
{
memset( &hwopts, 0, sizeof(hwopts) );
}
if ( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
{
return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
}
}
// Save a device reference and obtain immediate context
m_device = pDevice;
pDevice->GetImmediateContext( m_context.ReleaseAndGetAddressOf() );
assert( m_context );
//--- Create compute shader library: BC6H -----------------------------------------
// Modes 11-14
HRESULT hr = pDevice->CreateComputeShader( BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
// Modes 1-10
hr = pDevice->CreateComputeShader( BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
// Encode
hr = pDevice->CreateComputeShader( BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
//--- Create compute shader library: BC7 ------------------------------------------
// Modes 4, 5, 6
hr = pDevice->CreateComputeShader( BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
// Modes 1, 3, 7
hr = pDevice->CreateComputeShader( BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
// Modes 0, 2
hr = pDevice->CreateComputeShader( BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
// Encode
hr = pDevice->CreateComputeShader( BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
return hr;
return S_OK;
}
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT GPUCompressBC::Prepare( size_t width, size_t height, DXGI_FORMAT format, float alphaWeight )
{
if ( !width || !height || alphaWeight < 0.f )
return E_INVALIDARG;
#ifdef _M_X64
if ( (width > 0xFFFFFFFF) || (height > 0xFFFFFFFF) )
return E_INVALIDARG;
#endif
m_width = width;
m_height = height;
size_t xblocks = std::max<size_t>( 1, (width + 3) >> 2 );
size_t yblocks = std::max<size_t>( 1, (height + 3) >> 2 );
size_t num_blocks = xblocks * yblocks;
switch( format )
{
// BC6H GPU compressor takes RGBAF32 as input
case DXGI_FORMAT_BC6H_TYPELESS:
case DXGI_FORMAT_BC6H_UF16:
case DXGI_FORMAT_BC6H_SF16:
m_srcformat = DXGI_FORMAT_R32G32B32A32_FLOAT;
break;
// BC7 GPU compressor takes RGBA32 as input
case DXGI_FORMAT_BC7_TYPELESS:
case DXGI_FORMAT_BC7_UNORM:
m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case DXGI_FORMAT_BC7_UNORM_SRGB:
m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
break;
default:
m_bcformat = m_srcformat = DXGI_FORMAT_UNKNOWN;
return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
}
m_bcformat = format;
auto pDevice = m_device.Get();
if ( !pDevice )
return E_POINTER;
// Create structured buffers
size_t bufferSize = num_blocks * sizeof( BufferBC6HBC7 );
{
D3D11_BUFFER_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
desc.StructureByteStride = sizeof( BufferBC6HBC7 );
desc.ByteWidth = static_cast<UINT>( bufferSize );
HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_output.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
hr = pDevice->CreateBuffer( &desc, nullptr, m_err1.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
hr = pDevice->CreateBuffer( &desc, nullptr, m_err2.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
// Create staging output buffer
{
D3D11_BUFFER_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
desc.ByteWidth = static_cast<UINT>( bufferSize );
HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_outputCPU.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
// Create constant buffer
{
D3D11_BUFFER_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.ByteWidth = sizeof( ConstantsBC6HBC7 );
HRESULT hr = pDevice->CreateBuffer( &desc, nullptr, m_constBuffer.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
// Create shader resource views
{
D3D11_SHADER_RESOURCE_VIEW_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.Buffer.NumElements = static_cast<UINT>( num_blocks );
desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
HRESULT hr = pDevice->CreateShaderResourceView( m_err1.Get(), &desc, m_err1SRV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
hr = pDevice->CreateShaderResourceView( m_err2.Get(), &desc, m_err2SRV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
// Create unordered access views
{
D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.Buffer.NumElements = static_cast<UINT>( num_blocks );
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
HRESULT hr = pDevice->CreateUnorderedAccessView( m_output.Get(), &desc, m_outputUAV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
hr = pDevice->CreateUnorderedAccessView( m_err1.Get(), &desc, m_err1UAV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
hr = pDevice->CreateUnorderedAccessView( m_err2.Get(), &desc, m_err2UAV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
return S_OK;
}
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage )
{
if ( !srcImage.pixels || !destImage.pixels )
return E_INVALIDARG;
if ( srcImage.width != destImage.width
|| srcImage.height != destImage.height
|| srcImage.width != m_width
|| srcImage.height != m_height
|| srcImage.format != m_srcformat
|| destImage.format != m_bcformat )
{
return E_UNEXPECTED;
}
//--- Create input texture --------------------------------------------------------
// TODO - non-power-of-2 BCs require handling non-multiple of 4 cases
auto pDevice = m_device.Get();
if ( !pDevice )
return E_POINTER;
ScopedObject<ID3D11Texture2D> sourceTex;
{
D3D11_TEXTURE2D_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.Width = static_cast<UINT>( srcImage.width );
desc.Height = static_cast<UINT>( srcImage.height );
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = srcImage.format;
desc.SampleDesc.Count = 1;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
D3D11_SUBRESOURCE_DATA initData;
initData.pSysMem = srcImage.pixels;
initData.SysMemPitch = static_cast<DWORD>( srcImage.rowPitch );
initData.SysMemSlicePitch = static_cast<DWORD>( srcImage.slicePitch );
HRESULT hr = pDevice->CreateTexture2D( &desc, &initData, sourceTex.GetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
ScopedObject<ID3D11ShaderResourceView> sourceSRV;
{
D3D11_SHADER_RESOURCE_VIEW_DESC desc;
memset( &desc, 0, sizeof(desc) );
desc.Texture2D.MipLevels = 1;
desc.Format = m_srcformat;
desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
HRESULT hr = pDevice->CreateShaderResourceView( sourceTex.Get(), &desc, sourceSRV.ReleaseAndGetAddressOf() );
if ( FAILED(hr) )
{
return hr;
}
}
//--- Compress using DirectCompute ------------------------------------------------
bool isbc7 = false;
switch( m_bcformat )
{
case DXGI_FORMAT_BC6H_TYPELESS:
case DXGI_FORMAT_BC6H_UF16:
case DXGI_FORMAT_BC6H_SF16:
break;
case DXGI_FORMAT_BC7_TYPELESS:
case DXGI_FORMAT_BC7_UNORM:
case DXGI_FORMAT_BC7_UNORM_SRGB:
isbc7 = true;
break;
default:
return E_UNEXPECTED;
}
const UINT MAX_BLOCK_BATCH = 64;
auto pContext = m_context.Get();
if ( !pContext )
return E_UNEXPECTED;
size_t xblocks = std::max<size_t>( 1, (m_width + 3) >> 2 );
size_t yblocks = std::max<size_t>( 1, (m_height + 3) >> 2 );
UINT num_total_blocks = static_cast<UINT>( xblocks * yblocks );
UINT num_blocks = num_total_blocks;
int start_block_id = 0;
while (num_blocks > 0)
{
UINT n = std::min<UINT>( num_blocks, MAX_BLOCK_BATCH );
UINT uThreadGroupCount = n;
{
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
if ( FAILED(hr) )
return hr;
ConstantsBC6HBC7 param;
param.tex_width = static_cast<UINT>( srcImage.width );
param.num_block_x = static_cast<UINT>( xblocks );
param.format = m_bcformat;
param.mode_id = 0;
param.start_block_id = start_block_id;
param.num_total_blocks = num_total_blocks;
param.alpha_weight = m_alphaWeight;
memcpy( mapped.pData, &param, sizeof( param ) );
pContext->Unmap( m_constBuffer.Get(), 0 );
}
if ( isbc7 )
{
//--- BC7 -----------------------------------------------------------------
ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr };
RunComputeShader( pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(),
m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );
for ( UINT i = 0; i < 3; ++i )
{
static const UINT modes[] = { 1, 3, 7 };
{
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
if ( FAILED(hr) )
{
ResetContext( pContext );
return hr;
}
ConstantsBC6HBC7 param;
param.tex_width = static_cast<UINT>( srcImage.width );
param.num_block_x = static_cast<UINT>( xblocks );
param.format = m_bcformat;
param.mode_id = modes[i];
param.start_block_id = start_block_id;
param.num_total_blocks = num_total_blocks;
param.alpha_weight = m_alphaWeight;
memcpy( mapped.pData, &param, sizeof( param ) );
pContext->Unmap( m_constBuffer.Get(), 0 );
}
pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
RunComputeShader( pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(),
(i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount );
}
for ( UINT i = 0; i < 2; ++i )
{
static const UINT modes[] = { 0, 2 };
{
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
if ( FAILED(hr) )
{
ResetContext( pContext );
return hr;
}
ConstantsBC6HBC7 param;
param.tex_width = static_cast<UINT>( srcImage.width );
param.num_block_x = static_cast<UINT>( xblocks );
param.format = m_bcformat;
param.mode_id = modes[i];
param.start_block_id = start_block_id;
param.num_total_blocks = num_total_blocks;
param.alpha_weight = m_alphaWeight;
memcpy( mapped.pData, &param, sizeof( param ) );
pContext->Unmap( m_constBuffer.Get(), 0 );
}
pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get();
RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(),
(i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount );
}
pSRVs[1] = m_err2SRV.Get();
RunComputeShader( pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );
}
else
{
//--- BC6H ----------------------------------------------------------------
ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(), nullptr };
RunComputeShader( pContext, m_BC6H_tryModeG10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
m_err1UAV.Get(), std::max<UINT>(uThreadGroupCount / 4, 1) );
for ( UINT i = 0; i < 10; ++i )
{
{
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
if ( FAILED(hr) )
{
ResetContext( pContext );
return hr;
}
ConstantsBC6HBC7 param;
param.tex_width = static_cast<UINT>( srcImage.width );
param.num_block_x = static_cast<UINT>( xblocks );
param.format = m_bcformat;
param.mode_id = i;
param.start_block_id = start_block_id;
param.num_total_blocks = num_total_blocks;
memcpy( mapped.pData, &param, sizeof( param ) );
pContext->Unmap( m_constBuffer.Get(), 0 );
}
pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
RunComputeShader( pContext, m_BC6H_tryModeLE10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
(i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) );
}
pSRVs[1] = m_err1SRV.Get();
RunComputeShader( pContext, m_BC6H_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
m_outputUAV.Get(), std::max<UINT>(uThreadGroupCount / 2, 1) );
}
start_block_id += n;
num_blocks -= n;
}
ResetContext( pContext );
//--- Copy output texture back to CPU ---------------------------------------------
pContext->CopyResource( m_outputCPU.Get(), m_output.Get() );
D3D11_MAPPED_SUBRESOURCE mapped;
HRESULT hr = pContext->Map( m_outputCPU.Get(), 0, D3D11_MAP_READ, 0, &mapped );
if ( SUCCEEDED(hr) )
{
const uint8_t *pSrc = reinterpret_cast<const uint8_t *>( mapped.pData );
uint8_t *pDest = destImage.pixels;
size_t pitch = xblocks * sizeof( BufferBC6HBC7 );
size_t rows = std::max<size_t>( 1, ( destImage.height + 3 ) >> 2 );
for( size_t h = 0; h < rows; ++h )
{
memcpy( pDest, pSrc, destImage.rowPitch );
pSrc += pitch;
pDest += destImage.rowPitch;
}
pContext->Unmap( m_outputCPU.Get(), 0 );
}
return hr;
}
}; // namespace

View File

@ -0,0 +1,70 @@
//-------------------------------------------------------------------------------------
// BCDirectCompute.h
//
// Direct3D 11 Compute Shader BC Compressor
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//-------------------------------------------------------------------------------------
#if defined(_MSC_VER) && (_MSC_VER > 1000)
#pragma once
#endif
#include "scoped.h"
namespace DirectX
{
class GPUCompressBC
{
public:
GPUCompressBC();
HRESULT Initialize( _In_ ID3D11Device* pDevice );
HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f );
HRESULT Compress( _In_ const Image& srcImage, _In_ const Image& destImage );
DXGI_FORMAT GetSourceFormat() const { return m_srcformat; }
private:
DXGI_FORMAT m_bcformat;
DXGI_FORMAT m_srcformat;
float m_alphaWeight;
size_t m_width;
size_t m_height;
ScopedObject<ID3D11Device> m_device;
ScopedObject<ID3D11DeviceContext> m_context;
ScopedObject<ID3D11Buffer> m_err1;
ScopedObject<ID3D11UnorderedAccessView> m_err1UAV;
ScopedObject<ID3D11ShaderResourceView> m_err1SRV;
ScopedObject<ID3D11Buffer> m_err2;
ScopedObject<ID3D11UnorderedAccessView> m_err2UAV;
ScopedObject<ID3D11ShaderResourceView> m_err2SRV;
ScopedObject<ID3D11Buffer> m_output;
ScopedObject<ID3D11Buffer> m_outputCPU;
ScopedObject<ID3D11UnorderedAccessView> m_outputUAV;
ScopedObject<ID3D11Buffer> m_constBuffer;
// Compute shader library
ScopedObject<ID3D11ComputeShader> m_BC6H_tryModeG10CS;
ScopedObject<ID3D11ComputeShader> m_BC6H_tryModeLE10CS;
ScopedObject<ID3D11ComputeShader> m_BC6H_encodeBlockCS;
ScopedObject<ID3D11ComputeShader> m_BC7_tryMode456CS;
ScopedObject<ID3D11ComputeShader> m_BC7_tryMode137CS;
ScopedObject<ID3D11ComputeShader> m_BC7_tryMode02CS;
ScopedObject<ID3D11ComputeShader> m_BC7_encodeBlockCS;
};
}; // namespace

View File

@ -30,7 +30,7 @@
#include <ocidl.h>
#define DIRECTX_TEX_VERSION 110
#define DIRECTX_TEX_VERSION 120
#if defined(_MSC_VER) && (_MSC_VER<1610) && !defined(_In_reads_)
#define _Analysis_assume_(exp)
@ -484,6 +484,11 @@ namespace DirectX
_In_ DXGI_FORMAT format, _In_ DWORD compress, _In_ float alphaRef, _Out_ ScratchImage& cImages );
// Note that alphaRef is only used by BC1. 0.5f is a typical value to use
HRESULT Compress( _In_ ID3D11Device* pDevice, _In_ const Image& srcImage, _In_ DXGI_FORMAT format, _Out_ ScratchImage& image );
HRESULT Compress( _In_ ID3D11Device* pDevice, _In_ const Image* srcImages, _In_ size_t nimages, _In_ const TexMetadata& metadata,
_In_ DXGI_FORMAT format, _Out_ ScratchImage& cImages );
// DirectCompute-based compression
HRESULT Decompress( _In_ const Image& cImage, _In_ DXGI_FORMAT format, _Out_ ScratchImage& image );
HRESULT Decompress( _In_reads_(nimages) const Image* cImages, _In_ size_t nimages, _In_ const TexMetadata& metadata,
_In_ DXGI_FORMAT format, _Out_ ScratchImage& images );

View File

@ -0,0 +1,383 @@
//-------------------------------------------------------------------------------------
// DirectXTexCompressGPU.cpp
//
// DirectX Texture Library - DirectCompute-based texture compression
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkId=248926
//-------------------------------------------------------------------------------------
#include "directxtexp.h"
#include "bcdirectcompute.h"
namespace DirectX
{
//-------------------------------------------------------------------------------------
// Converts to R8G8B8A8_UNORM or R8G8B8A8_UNORM_SRGB doing any conversion logic needed
//-------------------------------------------------------------------------------------
static HRESULT _ConvertToRGBA32( _In_ const Image& srcImage, _In_ ScratchImage& image, bool srgb )
{
if ( !srcImage.pixels )
return E_POINTER;
DXGI_FORMAT format = srgb ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB : DXGI_FORMAT_R8G8B8A8_UNORM;
HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 );
if ( FAILED(hr) )
return hr;
const Image *img = image.GetImage( 0, 0, 0 );
if ( !img )
{
image.Release();
return E_POINTER;
}
uint8_t* pDest = img->pixels;
if ( !pDest )
{
image.Release();
return E_POINTER;
}
ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( ( sizeof(XMVECTOR) * srcImage.width ), 16 ) ) );
if ( !scanline )
{
image.Release();
return E_OUTOFMEMORY;
}
const uint8_t *pSrc = srcImage.pixels;
for( size_t h = 0; h < srcImage.height; ++h )
{
if ( !_LoadScanline( scanline.get(), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) )
{
image.Release();
return E_FAIL;
}
_ConvertScanline( scanline.get(), srcImage.width, format, srcImage.format, 0 );
if ( !_StoreScanline( pDest, img->rowPitch, format, scanline.get(), srcImage.width ) )
{
image.Release();
return E_FAIL;
}
pSrc += srcImage.rowPitch;
pDest += img->rowPitch;
}
return S_OK;
}
//-------------------------------------------------------------------------------------
// Converts to DXGI_FORMAT_R32G32B32A32_FLOAT doing any conversion logic needed
//-------------------------------------------------------------------------------------
static HRESULT _ConvertToRGBAF32( const Image& srcImage, ScratchImage& image )
{
if ( !srcImage.pixels )
return E_POINTER;
HRESULT hr = image.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 );
if ( FAILED(hr) )
return hr;
const Image *img = image.GetImage( 0, 0, 0 );
if ( !img )
{
image.Release();
return E_POINTER;
}
uint8_t* pDest = img->pixels;
if ( !pDest )
{
image.Release();
return E_POINTER;
}
const uint8_t *pSrc = srcImage.pixels;
for( size_t h = 0; h < srcImage.height; ++h )
{
if ( !_LoadScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) )
{
image.Release();
return E_FAIL;
}
_ConvertScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.format, 0 );
pSrc += srcImage.rowPitch;
pDest += img->rowPitch;
}
return S_OK;
}
//-------------------------------------------------------------------------------------
// Compress using GPU, converting to the proper input format for the shader if needed
//-------------------------------------------------------------------------------------
inline static HRESULT _GPUCompress( _In_ GPUCompressBC* gpubc, _In_ const Image& srcImage, _In_ const Image& destImage )
{
if ( !gpubc )
return E_POINTER;
assert( srcImage.pixels && destImage.pixels );
DXGI_FORMAT format = gpubc->GetSourceFormat();
if ( srcImage.format == format )
{
// Input is already in our required source format
return gpubc->Compress( srcImage, destImage );
}
else
{
// Convert format and then use as the source image
ScratchImage image;
HRESULT hr;
switch( format )
{
case DXGI_FORMAT_R8G8B8A8_UNORM:
hr = _ConvertToRGBA32( srcImage, image, false );
break;
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
hr = _ConvertToRGBA32( srcImage, image, true );
break;
case DXGI_FORMAT_R32G32B32A32_FLOAT:
hr = _ConvertToRGBAF32( srcImage, image );
break;
default:
hr = E_UNEXPECTED;
break;
}
if ( FAILED(hr) )
return hr;
const Image *img = image.GetImage( 0, 0, 0 );
if ( !img )
return E_POINTER;
return gpubc->Compress( *img, destImage );
}
}
//=====================================================================================
// Entry-points
//=====================================================================================
//-------------------------------------------------------------------------------------
// Compression
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT Compress( ID3D11Device* pDevice, const Image& srcImage, DXGI_FORMAT format, ScratchImage& image )
{
if ( !pDevice || IsCompressed(srcImage.format) || !IsCompressed(format) || IsTypeless(format) )
return E_INVALIDARG;
// Setup GPU compressor
std::unique_ptr<GPUCompressBC> gpubc( new (std::nothrow) GPUCompressBC );
if ( !gpubc )
return E_OUTOFMEMORY;
HRESULT hr = gpubc->Initialize( pDevice );
if ( FAILED(hr) )
return hr;
hr = gpubc->Prepare( srcImage.width, srcImage.height, format );
if ( FAILED(hr) )
return hr;
// Create workspace for result
hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 );
if ( FAILED(hr) )
return hr;
const Image *img = image.GetImage( 0, 0, 0 );
if ( !img )
{
image.Release();
return E_POINTER;
}
hr = _GPUCompress( gpubc.get(), srcImage, *img );
if ( FAILED(hr) )
image.Release();
return hr;
}
_Use_decl_annotations_
HRESULT Compress( ID3D11Device* pDevice, const Image* srcImages, size_t nimages, const TexMetadata& metadata,
DXGI_FORMAT format, ScratchImage& cImages )
{
if ( !pDevice || !srcImages || !nimages )
return E_INVALIDARG;
if ( !IsCompressed(format) || IsTypeless(format) )
return E_INVALIDARG;
cImages.Release();
// Setup GPU compressor
std::unique_ptr<GPUCompressBC> gpubc( new (std::nothrow) GPUCompressBC );
if ( !gpubc )
return E_OUTOFMEMORY;
HRESULT hr = gpubc->Initialize( pDevice );
if ( FAILED(hr) )
return hr;
// Create workspace for result
TexMetadata mdata2 = metadata;
mdata2.format = format;
hr = cImages.Initialize( mdata2 );
if ( FAILED(hr) )
return hr;
if ( nimages != cImages.GetImageCount() )
{
cImages.Release();
return E_FAIL;
}
const Image* dest = cImages.GetImages();
if ( !dest )
{
cImages.Release();
return E_POINTER;
}
// Process images (ordered by size)
switch( metadata.dimension )
{
case TEX_DIMENSION_TEXTURE1D:
case TEX_DIMENSION_TEXTURE2D:
{
size_t w = metadata.width;
size_t h = metadata.height;
for( size_t level=0; level < metadata.mipLevels; ++level )
{
hr = gpubc->Prepare( w, h, format );
if ( FAILED(hr) )
{
cImages.Release();
return hr;
}
for( size_t item = 0; item < metadata.arraySize; ++item )
{
size_t index = metadata.ComputeIndex( level, item, 0 );
if ( index >= nimages )
{
cImages.Release();
return E_FAIL;
}
assert( dest[ index ].format == format );
const Image& src = srcImages[ index ];
if ( src.width != dest[ index ].width || src.height != dest[ index ].height )
{
cImages.Release();
return E_FAIL;
}
hr = _GPUCompress( gpubc.get(), src, dest[ index ] );
if ( FAILED(hr) )
{
cImages.Release();
return hr;
}
}
if ( h > 1 )
h >>= 1;
if ( w > 1 )
w >>= 1;
}
}
break;
case TEX_DIMENSION_TEXTURE3D:
{
size_t w = metadata.width;
size_t h = metadata.height;
size_t d = metadata.depth;
for( size_t level=0; level < metadata.mipLevels; ++level )
{
hr = gpubc->Prepare( w, h, format );
if ( FAILED(hr) )
{
cImages.Release();
return hr;
}
for( size_t slice=0; slice < d; ++slice )
{
size_t index = metadata.ComputeIndex( level, 0, slice );
if ( index >= nimages )
{
cImages.Release();
return E_FAIL;
}
assert( dest[ index ].format == format );
const Image& src = srcImages[ index ];
if ( src.width != dest[ index ].width || src.height != dest[ index ].height )
{
cImages.Release();
return E_FAIL;
}
hr = _GPUCompress( gpubc.get(), src, dest[ index ] );
if ( FAILED(hr) )
{
cImages.Release();
return hr;
}
}
if ( h > 1 )
h >>= 1;
if ( w > 1 )
w >>= 1;
if ( d > 1 )
d >>= 1;
}
}
break;
default:
return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
}
return S_OK;
}
}; // namespace

View File

@ -406,13 +406,16 @@
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<CLInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -32,8 +32,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
</Project>

View File

@ -388,13 +388,16 @@
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<CLInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -32,8 +32,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
</Project>

View File

@ -383,19 +383,21 @@
</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemGroup />
<ItemGroup>
<CLInclude Include="BC.h" />
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<ClInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -31,11 +31,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
</ItemGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
</Project>

View File

@ -389,13 +389,16 @@
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<ClInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -31,11 +31,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
</ItemGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
</Project>

View File

@ -598,13 +598,16 @@
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<CLInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -32,8 +32,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
</Project>

View File

@ -600,13 +600,16 @@
<ClCompile Include="BC.cpp" />
<ClCompile Include="BC4BC5.cpp" />
<ClCompile Include="BC6HBC7.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<CLInclude Include="DDS.h" />
<CLInclude Include="filters.h" />
<CLInclude Include="scoped.h" />
<CLInclude Include="DirectXTex.h" />
<CLInclude Include="DirectXTexp.h" />
<CLInclude Include="DirectXTex.inl" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompress.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
<ClCompile Include="DirectXTexConvert.cpp" />
<ClCompile Include="DirectXTexD3D11.cpp" />
<ClCompile Include="DirectXTexDDS.cpp" />

View File

@ -32,8 +32,8 @@
<ClCompile Include="DirectXTexTGA.cpp" />
<ClCompile Include="DirectXTexUtil.cpp" />
<ClCompile Include="DirectXTexWIC.cpp" />
<ClInclude Include="BCDirectCompute.h" />
<ClCompile Include="BCDirectCompute.cpp" />
<ClCompile Include="DirectXTexCompressGPU.cpp" />
</ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
<ItemGroup></ItemGroup>
</Project>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
@echo off
rem THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
rem ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
rem THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
rem PARTICULAR PURPOSE.
rem
rem Copyright (c) Microsoft Corporation. All rights reserved.
setlocal
set error=0
call :CompileShader BC7Encode TryMode456CS
call :CompileShader BC7Encode TryMode137CS
call :CompileShader BC7Encode TryMode02CS
call :CompileShader BC7Encode EncodeBlockCS
call :CompileShader BC6HEncode TryModeG10CS
call :CompileShader BC6HEncode TryModeLE10CS
call :CompileShader BC6HEncode EncodeBlockCS
echo.
if %error% == 0 (
echo Shaders compiled ok
) else (
echo There were shader compilation errors!
)
endlocal
exit /b
:CompileShader
set fxc=fxc /nologo %1.hlsl /Tcs_4_0 /Zpc /Qstrip_reflect /Qstrip_debug /E%2 /FhCompiled\%1_%2.inc /Vn%1_%2
echo.
echo %fxc%
%fxc% || set error=1
exit /b

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -43,7 +43,13 @@ inline HANDLE safe_handle( HANDLE h ) { return (h == INVALID_HANDLE_VALUE) ? 0 :
#include <wrl.h>
template<class T> class ScopedObject : public Microsoft::WRL::ComPtr<T> {};
template<class T> class ScopedObject : public Microsoft::WRL::ComPtr<T>
{
public:
ScopedObject() : Microsoft::WRL::ComPtr<T>() {}
ScopedObject( T *p ) : Microsoft::WRL::ComPtr<T>(p) {}
ScopedObject( const ScopedObject& other ) : Microsoft::WRL::ComPtr( other ) {}
};
#else

View File

@ -43,8 +43,12 @@ enum OPTIONS // Note: dwOptions below assumes 32 or less options.
OPT_TA_WRAP,
OPT_TA_MIRROR,
OPT_FORCE_SINGLEPROC,
OPT_NOGPU,
OPT_MAX
};
static_assert( OPT_MAX <= 32, "dwOptions is a DWORD bitfield" );
struct SConversion
{
WCHAR szSrc [MAX_PATH];
@ -90,6 +94,7 @@ SValue g_pOptions[] =
{ L"wrap", OPT_TA_WRAP },
{ L"mirror", OPT_TA_MIRROR },
{ L"singleproc", OPT_FORCE_SINGLEPROC },
{ L"nogpu", OPT_NOGPU },
{ nullptr, 0 }
};
@ -371,6 +376,7 @@ void PrintUsage()
#ifdef _OPENMP
wprintf( L" -singleproc Do not use multi-threaded compression\n");
#endif
wprintf( L" -nogpu Do not use DirectCompute-based codecs\n");
wprintf( L"\n");
wprintf( L" <format>: ");
@ -385,6 +391,69 @@ void PrintUsage()
PrintList(15, g_pSaveFileTypes);
}
_Success_(return != false)
bool CreateDevice( _Outptr_ ID3D11Device** pDevice )
{
if ( !pDevice )
return false;
*pDevice = nullptr;
typedef HRESULT (WINAPI * LPD3D11CREATEDEVICE)( IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, UINT32, D3D_FEATURE_LEVEL*,
UINT, UINT32, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext** );
static LPD3D11CREATEDEVICE s_DynamicD3D11CreateDevice = nullptr;
if ( !s_DynamicD3D11CreateDevice )
{
HMODULE hModD3D11 = LoadLibrary( L"d3d11.dll" );
if ( !hModD3D11 )
return false;
s_DynamicD3D11CreateDevice = ( LPD3D11CREATEDEVICE )GetProcAddress( hModD3D11, "D3D11CreateDevice" );
if ( !s_DynamicD3D11CreateDevice )
return false;
}
D3D_FEATURE_LEVEL featureLevels[] =
{
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
};
UINT createDeviceFlags = 0;
#ifdef _DEBUG
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
D3D_FEATURE_LEVEL fl;
HRESULT hr = s_DynamicD3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, featureLevels, _countof(featureLevels),
D3D11_SDK_VERSION, pDevice, &fl, nullptr );
if ( SUCCEEDED(hr) )
{
if ( fl < D3D_FEATURE_LEVEL_11_0 )
{
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
hr = (*pDevice)->CheckFeatureSupport( D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts) );
if ( FAILED(hr) )
memset( &hwopts, 0, sizeof(hwopts) );
if ( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
{
hr = HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
}
}
}
if ( FAILED(hr) && *pDevice )
{
(*pDevice)->Release();
*pDevice = nullptr;
}
return SUCCEEDED(hr);
}
//--------------------------------------------------------------------------------------
// Entry-point
@ -453,7 +522,7 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
if( (OPT_NOLOGO != dwOption) && (OPT_TYPELESS_UNORM != dwOption) && (OPT_TYPELESS_FLOAT != dwOption)
&& (OPT_SEPALPHA != dwOption) && (OPT_PREMUL_ALPHA != dwOption) && (OPT_EXPAND_LUMINANCE != dwOption)
&& (OPT_TA_WRAP != dwOption) && (OPT_TA_MIRROR != dwOption)
&& (OPT_FORCE_SINGLEPROC != dwOption)
&& (OPT_FORCE_SINGLEPROC != dwOption) && (OPT_NOGPU != dwOption)
&& (OPT_SRGB != dwOption) && (OPT_SRGBI != dwOption) && (OPT_SRGBO != dwOption)
&& (OPT_HFLIP != dwOption) && (OPT_VFLIP != dwOption)
&& (OPT_DDS_DWORD_ALIGN != dwOption) && (OPT_USE_DX10 != dwOption) )
@ -640,6 +709,7 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
bool nonpow2warn = false;
bool non4bc = false;
SConversion *pConv;
ID3D11Device* pDevice = nullptr;
for(pConv = pConversion; pConv; pConv = pConv->pNext)
{
@ -1094,8 +1164,7 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
goto LError;
}
DWORD cflags = TEX_COMPRESS_DEFAULT;
#ifdef _OPENMP
bool bc6hbc7=false;
switch( tformat )
{
case DXGI_FORMAT_BC6H_TYPELESS:
@ -1104,12 +1173,35 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
case DXGI_FORMAT_BC7_TYPELESS:
case DXGI_FORMAT_BC7_UNORM:
case DXGI_FORMAT_BC7_UNORM_SRGB:
if ( !(dwOptions & (1 << OPT_FORCE_SINGLEPROC) ) )
bc6hbc7=true;
{
cflags |= TEX_COMPRESS_PARALLEL;
static bool s_tryonce = false;
if ( !s_tryonce )
{
s_tryonce = true;
if ( !(dwOptions & (1 << OPT_NOGPU) ) )
{
if ( !CreateDevice( &pDevice ) )
wprintf( L"\nWARNING: DirectCompute is not available, using BC6H / BC7 CPU codec\n" );
}
else
{
wprintf( L"\nWARNING: using BC6H / BC7 CPU codec\n" );
}
}
}
break;
}
DWORD cflags = TEX_COMPRESS_DEFAULT;
#ifdef _OPENMP
if ( bc6hbc7 && !(dwOptions & (1 << OPT_FORCE_SINGLEPROC) ) )
{
cflags |= TEX_COMPRESS_PARALLEL;
}
#endif
if ( (img->width % 4) != 0 || (img->height % 4) != 0 )
@ -1117,7 +1209,14 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
non4bc = true;
}
hr = Compress( img, nimg, info, tformat, cflags, 0.5f, *timage );
if ( bc6hbc7 && pDevice )
{
hr = Compress( pDevice, img, nimg, info, tformat, *timage );
}
else
{
hr = Compress( img, nimg, info, tformat, cflags, 0.5f, *timage );
}
if ( FAILED(hr) )
{
wprintf( L" FAILED [compress] (%x)\n", hr);
@ -1252,5 +1351,10 @@ LDone:
delete pConv;
}
if ( pDevice )
{
pDevice->Release();
}
return nReturn;
}