//------------------------------------------------------------------------------------- // DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library // // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. // // http://go.microsoft.com/fwlink/p/?LinkId=262885 //------------------------------------------------------------------------------------- #ifdef _MSC_VER #pragma warning( disable : 4616 4619 4061 4265 4626 5039 ) // C4616/C4619 #pragma warning warnings // C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label // C4265 class has virtual functions, but destructor is not virtual // C4626 assignment operator was implicitly defined as deleted // C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc #pragma warning(push) #pragma warning(disable: 4365) #endif #include #ifdef _MSC_VER #pragma warning(pop) #endif #include "DirectXSH.h" #include #include #include #include #include #ifdef __clang__ #pragma clang diagnostic ignored "-Wcovered-switch-default" #pragma clang diagnostic ignored "-Wswitch-enum" #endif using namespace DirectX; using Microsoft::WRL::ComPtr; namespace { struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } }; using ScopedAlignedArrayXMVECTOR = std::unique_ptr; //------------------------------------------------------------------------------------- // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926 // If you need additional DXGI format support, see DirectXTexConvert.cpp //------------------------------------------------------------------------------------- #define LOAD_SCANLINE( type, func )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = func( sPtr++ );\ }\ return true;\ }\ return false; #define LOAD_SCANLINE3( type, func, defvec )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\ }\ return true;\ }\ return false; #define LOAD_SCANLINE2( type, func, defvec )\ if ( size >= sizeof(type) )\ {\ const type * __restrict sPtr = reinterpret_cast(pSource);\ for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ {\ XMVECTOR v = func( sPtr++ );\ if ( dPtr >= ePtr ) break;\ *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\ }\ return true;\ }\ return false; #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 6101) #endif _Success_(return) bool LoadScanline( _Out_writes_(count) DirectX::XMVECTOR* pDestination, size_t count, _In_reads_bytes_(size) LPCVOID pSource, size_t size, DXGI_FORMAT format) { assert(pDestination && count > 0 && ((reinterpret_cast(pDestination) & 0xF) == 0)); assert(pSource && size > 0); using namespace DirectX::PackedVector; XMVECTOR* __restrict dPtr = pDestination; if (!dPtr) return false; const XMVECTOR* ePtr = pDestination + count; switch (format) { case DXGI_FORMAT_R32G32B32A32_FLOAT: { size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size; memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize); } return true; case DXGI_FORMAT_R32G32B32_FLOAT: LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3) case DXGI_FORMAT_R16G16B16A16_FLOAT: LOAD_SCANLINE(XMHALF4, XMLoadHalf4) case DXGI_FORMAT_R32G32_FLOAT: LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3) case DXGI_FORMAT_R11G11B10_FLOAT: LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3) case DXGI_FORMAT_R16G16_FLOAT: LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3) case DXGI_FORMAT_R32_FLOAT: if (size >= sizeof(float)) { const float* __restrict sPtr = reinterpret_cast(pSource); for (size_t icount = 0; icount < size; icount += sizeof(float)) { XMVECTOR v = XMLoadFloat(sPtr++); if (dPtr >= ePtr) break; *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000); } return true; } return false; case DXGI_FORMAT_R16_FLOAT: if (size >= sizeof(HALF)) { const HALF * __restrict sPtr = reinterpret_cast(pSource); for (size_t icount = 0; icount < size; icount += sizeof(HALF)) { if (dPtr >= ePtr) break; *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f); } return true; } return false; default: return false; } } #ifdef _MSC_VER #pragma warning(pop) #endif } // namespace anonymous //------------------------------------------------------------------------------------- // Projects a function represented in a cube map into spherical harmonics. // // http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx //------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT DirectX::SHProjectCubeMap( ID3D11DeviceContext *context, size_t order, ID3D11Texture2D *cubeMap, float *resultR, float *resultG, float* resultB) noexcept { if (!context || !cubeMap) return E_INVALIDARG; if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) return E_INVALIDARG; D3D11_TEXTURE2D_DESC desc; cubeMap->GetDesc(&desc); if ((desc.ArraySize != 6) || (desc.Width != desc.Height) || (desc.SampleDesc.Count > 1)) return E_FAIL; switch (desc.Format) { case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R16G16B16A16_FLOAT: case DXGI_FORMAT_R32G32_FLOAT: case DXGI_FORMAT_R11G11B10_FLOAT: case DXGI_FORMAT_R16G16_FLOAT: case DXGI_FORMAT_R32_FLOAT: case DXGI_FORMAT_R16_FLOAT: // See LoadScanline to support more pixel formats break; default: return E_FAIL; } //--- Create a staging resource copy (if needed) to be able to read data ID3D11Texture2D* texture = nullptr; ComPtr staging; if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ)) { D3D11_TEXTURE2D_DESC sdesc = desc; sdesc.BindFlags = 0; sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; sdesc.Usage = D3D11_USAGE_STAGING; ComPtr device; context->GetDevice(&device); HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging); if (FAILED(hr)) return hr; context->CopyResource(staging.Get(), cubeMap); texture = staging.Get(); } else texture = cubeMap; assert(texture != nullptr); //--- Setup for SH projection ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast(_aligned_malloc(sizeof(XMVECTOR)*desc.Width, 16))); if (!scanline) return E_OUTOFMEMORY; assert(desc.Width > 0); float fSize = static_cast(desc.Width); float fPicSize = 1.0f / fSize; // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did // this was incorrect - but only for computing the differential solid // angle, where the final value was 1.0 instead of 1-1/w... float fB = -1.0f + 1.0f / fSize; float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f; // clear out accumulation variables float fWt = 0.0f; if (resultR) memset(resultR, 0, sizeof(float)*order*order); if (resultG) memset(resultG, 0, sizeof(float)*order*order); if (resultB) memset(resultB, 0, sizeof(float)*order*order); float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; //--- Process each face of the cubemap for (UINT face = 0; face < 6; ++face) { UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels); D3D11_MAPPED_SUBRESOURCE mapped; HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped); if (FAILED(hr)) return hr; const uint8_t *pSrc = reinterpret_cast(mapped.pData); for (UINT y = 0; y < desc.Height; ++y) { XMVECTOR* ptr = scanline.get(); if (!LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format)) { context->Unmap(texture, dindex); return E_FAIL; } const float v = float(y) * fS + fB; XMVECTOR* pixel = ptr; for (UINT x = 0; x < desc.Width; ++x, ++pixel) { const float u = float(x) * fS + fB; float ix, iy, iz; switch (face) { case 0: // Positive X iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; ix = 1.0f; break; case 1: // Negative X iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; ix = -1; break; case 2: // Positive Y iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize; iy = 1.0f; ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; break; case 3: // Negative Y iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; iy = -1.0f; ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; break; case 4: // Positive Z iz = 1.0f; iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; break; case 5: // Negative Z iz = -1.0f; iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; break; default: ix = iy = iz = 0.f; assert(false); break; } XMVECTOR dir = XMVectorSet(ix, iy, iz, 0); dir = XMVector3Normalize(dir); const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v)); fWt += fDiffSolid; XMSHEvalDirection(shBuff, order, dir); XMFLOAT3A clr; XMStoreFloat3A(&clr, *pixel); if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid)); if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid)); if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid)); } pSrc += mapped.RowPitch; } context->Unmap(texture, dindex); } const float fNormProj = (4.0f*XM_PI) / fWt; if (resultR) XMSHScale(resultR, order, resultR, fNormProj); if (resultG) XMSHScale(resultG, order, resultG, fNormProj); if (resultB) XMSHScale(resultB, order, resultB, fNormProj); return S_OK; }