crossxtex/DirectXTex/DirectXTexMipmaps.cpp
2018-06-12 18:07:34 -07:00

3134 lines
108 KiB
C++

//-------------------------------------------------------------------------------------
// DirectXTexMipMaps.cpp
//
// DirectX Texture Library - Mip-map generation
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkId=248926
//-------------------------------------------------------------------------------------
#include "DirectXTexp.h"
#include "filters.h"
using namespace DirectX;
using Microsoft::WRL::ComPtr;
namespace
{
inline bool ispow2(_In_ size_t x)
{
return ((x != 0) && !(x & (x - 1)));
}
size_t CountMips(_In_ size_t width, _In_ size_t height)
{
size_t mipLevels = 1;
while (height > 1 || width > 1)
{
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
++mipLevels;
}
return mipLevels;
}
size_t CountMips3D(_In_ size_t width, _In_ size_t height, _In_ size_t depth)
{
size_t mipLevels = 1;
while (height > 1 || width > 1 || depth > 1)
{
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
++mipLevels;
}
return mipLevels;
}
HRESULT EnsureWicBitmapPixelFormat(
_In_ IWICImagingFactory* pWIC,
_In_ IWICBitmap* src,
_In_ DWORD filter,
_In_ const WICPixelFormatGUID& desiredPixelFormat,
_Deref_out_ IWICBitmap** dest)
{
if (!pWIC || !src || !dest)
return E_POINTER;
*dest = nullptr;
WICPixelFormatGUID actualPixelFormat;
HRESULT hr = src->GetPixelFormat(&actualPixelFormat);
if (SUCCEEDED(hr))
{
if (memcmp(&actualPixelFormat, &desiredPixelFormat, sizeof(WICPixelFormatGUID)) == 0)
{
src->AddRef();
*dest = src;
}
else
{
ComPtr<IWICFormatConverter> converter;
hr = pWIC->CreateFormatConverter(converter.GetAddressOf());
if (SUCCEEDED(hr))
{
BOOL canConvert = FALSE;
hr = converter->CanConvert(actualPixelFormat, desiredPixelFormat, &canConvert);
if (FAILED(hr) || !canConvert)
{
return E_UNEXPECTED;
}
}
if (SUCCEEDED(hr))
{
hr = converter->Initialize(src, desiredPixelFormat, _GetWICDither(filter), nullptr, 0, WICBitmapPaletteTypeMedianCut);
}
if (SUCCEEDED(hr))
{
hr = pWIC->CreateBitmapFromSource(converter.Get(), WICBitmapCacheOnDemand, dest);
}
}
}
return hr;
}
}
namespace DirectX
{
bool _CalculateMipLevels(_In_ size_t width, _In_ size_t height, _Inout_ size_t& mipLevels)
{
if (mipLevels > 1)
{
size_t maxMips = CountMips(width, height);
if (mipLevels > maxMips)
return false;
}
else if (mipLevels == 0)
{
mipLevels = CountMips(width, height);
}
else
{
mipLevels = 1;
}
return true;
}
bool _CalculateMipLevels3D(_In_ size_t width, _In_ size_t height, _In_ size_t depth, _Inout_ size_t& mipLevels)
{
if (mipLevels > 1)
{
size_t maxMips = CountMips3D(width, height, depth);
if (mipLevels > maxMips)
return false;
}
else if (mipLevels == 0)
{
mipLevels = CountMips3D(width, height, depth);
}
else
{
mipLevels = 1;
}
return true;
}
//--- Resizing color and alpha channels separately using WIC ---
HRESULT _ResizeSeparateColorAndAlpha(
_In_ IWICImagingFactory* pWIC,
_In_ bool iswic2,
_In_ IWICBitmap* original,
_In_ size_t newWidth,
_In_ size_t newHeight,
_In_ DWORD filter,
_Inout_ const Image* img)
{
if (!pWIC || !original || !img)
return E_POINTER;
const WICBitmapInterpolationMode interpolationMode = _GetWICInterp(filter);
WICPixelFormatGUID desiredPixelFormat = GUID_WICPixelFormatUndefined;
HRESULT hr = original->GetPixelFormat(&desiredPixelFormat);
size_t colorBytesInPixel = 0;
size_t colorBytesPerPixel = 0;
size_t colorWithAlphaBytesPerPixel = 0;
WICPixelFormatGUID colorPixelFormat = GUID_WICPixelFormatUndefined;
WICPixelFormatGUID colorWithAlphaPixelFormat = GUID_WICPixelFormatUndefined;
if (SUCCEEDED(hr))
{
ComPtr<IWICComponentInfo> componentInfo;
hr = pWIC->CreateComponentInfo(desiredPixelFormat, componentInfo.GetAddressOf());
ComPtr<IWICPixelFormatInfo> pixelFormatInfo;
if (SUCCEEDED(hr))
{
hr = componentInfo.As(&pixelFormatInfo);
}
UINT bitsPerPixel = 0;
if (SUCCEEDED(hr))
{
hr = pixelFormatInfo->GetBitsPerPixel(&bitsPerPixel);
}
if (SUCCEEDED(hr))
{
if (bitsPerPixel <= 32)
{
colorBytesInPixel = colorBytesPerPixel = 3;
colorPixelFormat = GUID_WICPixelFormat24bppBGR;
colorWithAlphaBytesPerPixel = 4;
colorWithAlphaPixelFormat = GUID_WICPixelFormat32bppBGRA;
}
else
{
#if(_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE)
if (iswic2)
{
colorBytesInPixel = colorBytesPerPixel = 12;
colorPixelFormat = GUID_WICPixelFormat96bppRGBFloat;
}
else
#else
UNREFERENCED_PARAMETER(iswic2);
#endif
{
colorBytesInPixel = 12;
colorBytesPerPixel = 16;
colorPixelFormat = GUID_WICPixelFormat128bppRGBFloat;
}
colorWithAlphaBytesPerPixel = 16;
colorWithAlphaPixelFormat = GUID_WICPixelFormat128bppRGBAFloat;
}
}
}
// Resize color only image (no alpha channel)
ComPtr<IWICBitmap> resizedColor;
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmapScaler> colorScaler;
hr = pWIC->CreateBitmapScaler(colorScaler.GetAddressOf());
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmap> converted;
hr = EnsureWicBitmapPixelFormat(pWIC, original, filter, colorPixelFormat, converted.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = colorScaler->Initialize(converted.Get(), static_cast<UINT>(newWidth), static_cast<UINT>(newHeight), interpolationMode);
}
}
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmap> resized;
hr = pWIC->CreateBitmapFromSource(colorScaler.Get(), WICBitmapCacheOnDemand, resized.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = EnsureWicBitmapPixelFormat(pWIC, resized.Get(), filter, colorPixelFormat, resizedColor.GetAddressOf());
}
}
}
// Resize color+alpha image
ComPtr<IWICBitmap> resizedColorWithAlpha;
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmapScaler> colorWithAlphaScaler;
hr = pWIC->CreateBitmapScaler(colorWithAlphaScaler.GetAddressOf());
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmap> converted;
hr = EnsureWicBitmapPixelFormat(pWIC, original, filter, colorWithAlphaPixelFormat, converted.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = colorWithAlphaScaler->Initialize(converted.Get(), static_cast<UINT>(newWidth), static_cast<UINT>(newHeight), interpolationMode);
}
}
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmap> resized;
hr = pWIC->CreateBitmapFromSource(colorWithAlphaScaler.Get(), WICBitmapCacheOnDemand, resized.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = EnsureWicBitmapPixelFormat(pWIC, resized.Get(), filter, colorWithAlphaPixelFormat, resizedColorWithAlpha.GetAddressOf());
}
}
}
// Merge pixels (copying color channels from color only image to color+alpha image)
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmapLock> colorLock;
ComPtr<IWICBitmapLock> colorWithAlphaLock;
hr = resizedColor->Lock(nullptr, WICBitmapLockRead, colorLock.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = resizedColorWithAlpha->Lock(nullptr, WICBitmapLockWrite, colorWithAlphaLock.GetAddressOf());
}
if (SUCCEEDED(hr))
{
WICInProcPointer colorWithAlphaData = nullptr;
UINT colorWithAlphaSizeInBytes = 0;
UINT colorWithAlphaStride = 0;
hr = colorWithAlphaLock->GetDataPointer(&colorWithAlphaSizeInBytes, &colorWithAlphaData);
if (SUCCEEDED(hr))
{
if (!colorWithAlphaData)
{
hr = E_POINTER;
}
else
{
hr = colorWithAlphaLock->GetStride(&colorWithAlphaStride);
}
}
WICInProcPointer colorData = nullptr;
UINT colorSizeInBytes = 0;
UINT colorStride = 0;
if (SUCCEEDED(hr))
{
hr = colorLock->GetDataPointer(&colorSizeInBytes, &colorData);
if (SUCCEEDED(hr))
{
if (!colorData)
{
hr = E_POINTER;
}
else
{
hr = colorLock->GetStride(&colorStride);
}
}
}
for (size_t j = 0; SUCCEEDED(hr) && j < newHeight; j++)
{
for (size_t i = 0; SUCCEEDED(hr) && i < newWidth; i++)
{
size_t colorWithAlphaIndex = (j * colorWithAlphaStride) + (i * colorWithAlphaBytesPerPixel);
size_t colorIndex = (j * colorStride) + (i * colorBytesPerPixel);
if (((colorWithAlphaIndex + colorBytesInPixel) > colorWithAlphaSizeInBytes)
|| ((colorIndex + colorBytesPerPixel) > colorSizeInBytes))
{
hr = E_INVALIDARG;
}
else
{
#pragma warning( suppress : 26014 6386 ) // No overflow possible here
memcpy_s(colorWithAlphaData + colorWithAlphaIndex, colorWithAlphaBytesPerPixel, colorData + colorIndex, colorBytesInPixel);
}
}
}
}
}
if (SUCCEEDED(hr))
{
ComPtr<IWICBitmap> wicBitmap;
hr = EnsureWicBitmapPixelFormat(pWIC, resizedColorWithAlpha.Get(), filter, desiredPixelFormat, wicBitmap.GetAddressOf());
if (SUCCEEDED(hr))
{
hr = wicBitmap->CopyPixels(nullptr, static_cast<UINT>(img->rowPitch), static_cast<UINT>(img->slicePitch), img->pixels);
}
}
return hr;
}
}
namespace
{
//--- determine when to use WIC vs. non-WIC paths ---
bool UseWICFiltering(_In_ DXGI_FORMAT format, _In_ DWORD filter)
{
if (filter & TEX_FILTER_FORCE_NON_WIC)
{
// Explicit flag indicates use of non-WIC code paths
return false;
}
if (filter & TEX_FILTER_FORCE_WIC)
{
// Explicit flag to use WIC code paths, skips all the case checks below
return true;
}
if (IsSRGB(format) || (filter & TEX_FILTER_SRGB))
{
// Use non-WIC code paths for sRGB correct filtering
return false;
}
#if defined(_XBOX_ONE) && defined(_TITLE)
if (format == DXGI_FORMAT_R16G16B16A16_FLOAT
|| format == DXGI_FORMAT_R16_FLOAT)
{
// Use non-WIC code paths as these conversions are not supported by Xbox One XDK
return false;
}
#endif
static_assert(TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK");
switch (filter & TEX_FILTER_MASK)
{
case TEX_FILTER_LINEAR:
if (filter & TEX_FILTER_WRAP)
{
// WIC only supports 'clamp' semantics (MIRROR is equivalent to clamp for linear)
return false;
}
if (BitsPerColor(format) > 8)
{
// Avoid the WIC bitmap scaler when doing Linear filtering of XR/HDR formats
return false;
}
break;
case TEX_FILTER_CUBIC:
if (filter & (TEX_FILTER_WRAP | TEX_FILTER_MIRROR))
{
// WIC only supports 'clamp' semantics
return false;
}
if (BitsPerColor(format) > 8)
{
// Avoid the WIC bitmap scaler when doing Cubic filtering of XR/HDR formats
return false;
}
break;
case TEX_FILTER_TRIANGLE:
// WIC does not implement this filter
return false;
}
return true;
}
//--- mipmap (1D/2D) generation using WIC image scalar ---
HRESULT GenerateMipMapsUsingWIC(
_In_ const Image& baseImage,
_In_ DWORD filter,
_In_ size_t levels,
_In_ const WICPixelFormatGUID& pfGUID,
_In_ const ScratchImage& mipChain,
_In_ size_t item)
{
assert(levels > 1);
if (!baseImage.pixels || !mipChain.GetPixels())
return E_POINTER;
bool iswic2 = false;
IWICImagingFactory* pWIC = GetWICFactory(iswic2);
if (!pWIC)
return E_NOINTERFACE;
size_t width = baseImage.width;
size_t height = baseImage.height;
ComPtr<IWICBitmap> source;
HRESULT hr = pWIC->CreateBitmapFromMemory(static_cast<UINT>(width), static_cast<UINT>(height), pfGUID,
static_cast<UINT>(baseImage.rowPitch), static_cast<UINT>(baseImage.slicePitch),
baseImage.pixels, source.GetAddressOf());
if (FAILED(hr))
return hr;
// Copy base image to top miplevel
const Image *img0 = mipChain.GetImage(0, item, 0);
if (!img0)
return E_POINTER;
uint8_t* pDest = img0->pixels;
if (!pDest)
return E_POINTER;
const uint8_t *pSrc = baseImage.pixels;
for (size_t h = 0; h < height; ++h)
{
size_t msize = std::min<size_t>(img0->rowPitch, baseImage.rowPitch);
memcpy_s(pDest, img0->rowPitch, pSrc, msize);
pSrc += baseImage.rowPitch;
pDest += img0->rowPitch;
}
ComPtr<IWICComponentInfo> componentInfo;
hr = pWIC->CreateComponentInfo(pfGUID, componentInfo.GetAddressOf());
if (FAILED(hr))
return hr;
ComPtr<IWICPixelFormatInfo2> pixelFormatInfo;
hr = componentInfo.As(&pixelFormatInfo);
if (FAILED(hr))
return hr;
BOOL supportsTransparency = FALSE;
hr = pixelFormatInfo->SupportsTransparency(&supportsTransparency);
if (FAILED(hr))
return hr;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
const Image *img = mipChain.GetImage(level, item, 0);
if (!img)
return E_POINTER;
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
assert(img->width == width && img->height == height && img->format == baseImage.format);
if ((filter & TEX_FILTER_SEPARATE_ALPHA) && supportsTransparency)
{
hr = _ResizeSeparateColorAndAlpha(pWIC, iswic2, source.Get(), width, height, filter, img);
if (FAILED(hr))
return hr;
}
else
{
ComPtr<IWICBitmapScaler> scaler;
hr = pWIC->CreateBitmapScaler(scaler.GetAddressOf());
if (FAILED(hr))
return hr;
hr = scaler->Initialize(source.Get(), static_cast<UINT>(width), static_cast<UINT>(height), _GetWICInterp(filter));
if (FAILED(hr))
return hr;
WICPixelFormatGUID pfScaler;
hr = scaler->GetPixelFormat(&pfScaler);
if (FAILED(hr))
return hr;
if (memcmp(&pfScaler, &pfGUID, sizeof(WICPixelFormatGUID)) == 0)
{
hr = scaler->CopyPixels(nullptr, static_cast<UINT>(img->rowPitch), static_cast<UINT>(img->slicePitch), img->pixels);
if (FAILED(hr))
return hr;
}
else
{
// The WIC bitmap scaler is free to return a different pixel format than the source image, so here we
// convert it back
ComPtr<IWICFormatConverter> FC;
hr = pWIC->CreateFormatConverter(FC.GetAddressOf());
if (FAILED(hr))
return hr;
BOOL canConvert = FALSE;
hr = FC->CanConvert(pfScaler, pfGUID, &canConvert);
if (FAILED(hr) || !canConvert)
{
return E_UNEXPECTED;
}
hr = FC->Initialize(scaler.Get(), pfGUID, _GetWICDither(filter), nullptr, 0, WICBitmapPaletteTypeMedianCut);
if (FAILED(hr))
return hr;
hr = FC->CopyPixels(nullptr, static_cast<UINT>(img->rowPitch), static_cast<UINT>(img->slicePitch), img->pixels);
if (FAILED(hr))
return hr;
}
}
}
return S_OK;
}
//-------------------------------------------------------------------------------------
// Generate (1D/2D) mip-map helpers (custom filtering)
//-------------------------------------------------------------------------------------
HRESULT Setup2DMips(
_In_reads_(nimages) const Image* baseImages,
_In_ size_t nimages,
_In_ const TexMetadata& mdata,
_Out_ ScratchImage& mipChain)
{
if (!baseImages || !nimages)
return E_INVALIDARG;
assert(mdata.mipLevels > 1);
assert(mdata.arraySize == nimages);
assert(mdata.depth == 1 && mdata.dimension != TEX_DIMENSION_TEXTURE3D);
assert(mdata.width == baseImages[0].width);
assert(mdata.height == baseImages[0].height);
assert(mdata.format == baseImages[0].format);
HRESULT hr = mipChain.Initialize(mdata);
if (FAILED(hr))
return hr;
// Copy base image(s) to top of mip chain
for (size_t item = 0; item < nimages; ++item)
{
const Image& src = baseImages[item];
const Image *dest = mipChain.GetImage(0, item, 0);
if (!dest)
{
mipChain.Release();
return E_POINTER;
}
assert(src.format == dest->format);
uint8_t* pDest = dest->pixels;
if (!pDest)
{
mipChain.Release();
return E_POINTER;
}
const uint8_t *pSrc = src.pixels;
size_t rowPitch = src.rowPitch;
for (size_t h = 0; h < mdata.height; ++h)
{
size_t msize = std::min<size_t>(dest->rowPitch, rowPitch);
memcpy_s(pDest, dest->rowPitch, pSrc, msize);
pSrc += rowPitch;
pDest += dest->rowPitch;
}
}
return S_OK;
}
//--- 2D Point Filter ---
HRESULT Generate2DMipsPointFilter(size_t levels, const ScratchImage& mipChain, size_t item)
{
if (!mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (2 scanlines)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 2), 16)));
if (!scanline)
return E_OUTOFMEMORY;
XMVECTOR* target = scanline.get();
XMVECTOR* row = target + width;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
#ifdef _DEBUG
memset(row, 0xCD, sizeof(XMVECTOR)*width);
#endif
// 2D point filter
const Image* src = mipChain.GetImage(level - 1, item, 0);
const Image* dest = mipChain.GetImage(level, item, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
size_t xinc = (width << 16) / nwidth;
size_t yinc = (height << 16) / nheight;
size_t lasty = size_t(-1);
size_t sy = 0;
for (size_t y = 0; y < nheight; ++y)
{
if ((lasty ^ sy) >> 16)
{
if (!_LoadScanline(row, width, pSrc + (rowPitch * (sy >> 16)), rowPitch, src->format))
return E_FAIL;
lasty = sy;
}
size_t sx = 0;
for (size_t x = 0; x < nwidth; ++x)
{
target[x] = row[sx >> 16];
sx += xinc;
}
if (!_StoreScanline(pDest, dest->rowPitch, dest->format, target, nwidth))
return E_FAIL;
pDest += dest->rowPitch;
sy += yinc;
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
}
return S_OK;
}
//--- 2D Box Filter ---
HRESULT Generate2DMipsBoxFilter(size_t levels, DWORD filter, const ScratchImage& mipChain, size_t item)
{
if (!mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
if (!ispow2(width) || !ispow2(height))
return E_FAIL;
// Allocate temporary space (3 scanlines)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 3), 16)));
if (!scanline)
return E_OUTOFMEMORY;
XMVECTOR* target = scanline.get();
XMVECTOR* urow0 = target + width;
XMVECTOR* urow1 = target + width * 2;
const XMVECTOR* urow2 = urow0 + 1;
const XMVECTOR* urow3 = urow1 + 1;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
if (height <= 1)
{
urow1 = urow0;
}
if (width <= 1)
{
urow2 = urow0;
urow3 = urow1;
}
// 2D box filter
const Image* src = mipChain.GetImage(level - 1, item, 0);
const Image* dest = mipChain.GetImage(level, item, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
for (size_t y = 0; y < nheight; ++y)
{
if (!_LoadScanlineLinear(urow0, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
if (urow0 != urow1)
{
if (!_LoadScanlineLinear(urow1, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
}
for (size_t x = 0; x < nwidth; ++x)
{
size_t x2 = x << 1;
AVERAGE4(target[x], urow0[x2], urow1[x2], urow2[x2], urow3[x2]);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
}
return S_OK;
}
//--- 2D Linear Filter ---
HRESULT Generate2DMipsLinearFilter(size_t levels, DWORD filter, const ScratchImage& mipChain, size_t item)
{
if (!mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (3 scanlines, plus X and Y filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 3), 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<LinearFilter[]> lf(new (std::nothrow) LinearFilter[width + height]);
if (!lf)
return E_OUTOFMEMORY;
LinearFilter* lfX = lf.get();
LinearFilter* lfY = lf.get() + width;
XMVECTOR* target = scanline.get();
XMVECTOR* row0 = target + width;
XMVECTOR* row1 = target + width * 2;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
// 2D linear filter
const Image* src = mipChain.GetImage(level - 1, item, 0);
const Image* dest = mipChain.GetImage(level, item, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
_CreateLinearFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, lfX);
size_t nheight = (height > 1) ? (height >> 1) : 1;
_CreateLinearFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, lfY);
#ifdef _DEBUG
memset(row0, 0xCD, sizeof(XMVECTOR)*width);
memset(row1, 0xDD, sizeof(XMVECTOR)*width);
#endif
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = lfY[y];
if (toY.u0 != u0)
{
if (toY.u0 != u1)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(row0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
return E_FAIL;
}
else
{
u0 = u1;
u1 = size_t(-1);
std::swap(row0, row1);
}
}
if (toY.u1 != u1)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(row1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = lfX[x];
BILINEAR_INTERPOLATE(target[x], toX, toY, row0, row1);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
}
return S_OK;
}
//--- 2D Cubic Filter ---
HRESULT Generate2DMipsCubicFilter(size_t levels, DWORD filter, const ScratchImage& mipChain, size_t item)
{
if (!mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (5 scanlines, plus X and Y filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 5), 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<CubicFilter[]> cf(new (std::nothrow) CubicFilter[width + height]);
if (!cf)
return E_OUTOFMEMORY;
CubicFilter* cfX = cf.get();
CubicFilter* cfY = cf.get() + width;
XMVECTOR* target = scanline.get();
XMVECTOR* row0 = target + width;
XMVECTOR* row1 = target + width * 2;
XMVECTOR* row2 = target + width * 3;
XMVECTOR* row3 = target + width * 4;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
// 2D cubic filter
const Image* src = mipChain.GetImage(level - 1, item, 0);
const Image* dest = mipChain.GetImage(level, item, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
_CreateCubicFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX);
size_t nheight = (height > 1) ? (height >> 1) : 1;
_CreateCubicFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY);
#ifdef _DEBUG
memset(row0, 0xCD, sizeof(XMVECTOR)*width);
memset(row1, 0xDD, sizeof(XMVECTOR)*width);
memset(row2, 0xED, sizeof(XMVECTOR)*width);
memset(row3, 0xFD, sizeof(XMVECTOR)*width);
#endif
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
size_t u2 = size_t(-1);
size_t u3 = size_t(-1);
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = cfY[y];
// Scanline 1
if (toY.u0 != u0)
{
if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(row0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u0 == u1)
{
u0 = u1;
u1 = size_t(-1);
std::swap(row0, row1);
}
else if (toY.u0 == u2)
{
u0 = u2;
u2 = size_t(-1);
std::swap(row0, row2);
}
else if (toY.u0 == u3)
{
u0 = u3;
u3 = size_t(-1);
std::swap(row0, row3);
}
}
// Scanline 2
if (toY.u1 != u1)
{
if (toY.u1 != u2 && toY.u1 != u3)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(row1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u1 == u2)
{
u1 = u2;
u2 = size_t(-1);
std::swap(row1, row2);
}
else if (toY.u1 == u3)
{
u1 = u3;
u3 = size_t(-1);
std::swap(row1, row3);
}
}
// Scanline 3
if (toY.u2 != u2)
{
if (toY.u2 != u3)
{
u2 = toY.u2;
if (!_LoadScanlineLinear(row2, width, pSrc + (rowPitch * u2), rowPitch, src->format, filter))
return E_FAIL;
}
else
{
u2 = u3;
u3 = size_t(-1);
std::swap(row2, row3);
}
}
// Scanline 4
if (toY.u3 != u3)
{
u3 = toY.u3;
if (!_LoadScanlineLinear(row3, width, pSrc + (rowPitch * u3), rowPitch, src->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = cfX[x];
XMVECTOR C0, C1, C2, C3;
CUBIC_INTERPOLATE(C0, toX.x, row0[toX.u0], row0[toX.u1], row0[toX.u2], row0[toX.u3]);
CUBIC_INTERPOLATE(C1, toX.x, row1[toX.u0], row1[toX.u1], row1[toX.u2], row1[toX.u3]);
CUBIC_INTERPOLATE(C2, toX.x, row2[toX.u0], row2[toX.u1], row2[toX.u2], row2[toX.u3]);
CUBIC_INTERPOLATE(C3, toX.x, row3[toX.u0], row3[toX.u1], row3[toX.u2], row3[toX.u3]);
CUBIC_INTERPOLATE(target[x], toY.x, C0, C1, C2, C3);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
}
return S_OK;
}
//--- 2D Triangle Filter ---
HRESULT Generate2DMipsTriangleFilter(size_t levels, DWORD filter, const ScratchImage& mipChain, size_t item)
{
if (!mipChain.GetImages())
return E_INVALIDARG;
using namespace TriangleFilter;
// This assumes that the base image is already placed into the mipChain at the top level... (see _Setup2DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate initial temporary space (1 scanline, accumulation rows, plus X and Y filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR) * width, 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<TriangleRow[]> rowActive(new (std::nothrow) TriangleRow[height]);
if (!rowActive)
return E_OUTOFMEMORY;
TriangleRow * rowFree = nullptr;
std::unique_ptr<Filter> tfX, tfY;
XMVECTOR* row = scanline.get();
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
// 2D triangle filter
const Image* src = mipChain.GetImage(level - 1, item, 0);
const Image* dest = mipChain.GetImage(level, item, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
size_t rowPitch = src->rowPitch;
const uint8_t* pEndSrc = pSrc + rowPitch * height;
uint8_t* pDest = dest->pixels;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
HRESULT hr = _Create(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, tfX);
if (FAILED(hr))
return hr;
size_t nheight = (height > 1) ? (height >> 1) : 1;
hr = _Create(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, tfY);
if (FAILED(hr))
return hr;
#ifdef _DEBUG
memset(row, 0xCD, sizeof(XMVECTOR)*width);
#endif
auto xFromEnd = reinterpret_cast<const FilterFrom*>(reinterpret_cast<const uint8_t*>(tfX.get()) + tfX->sizeInBytes);
auto yFromEnd = reinterpret_cast<const FilterFrom*>(reinterpret_cast<const uint8_t*>(tfY.get()) + tfY->sizeInBytes);
// Count times rows get written (and clear out any leftover accumulation rows from last miplevel)
for (FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; )
{
for (size_t j = 0; j < yFrom->count; ++j)
{
size_t v = yFrom->to[j].u;
assert(v < nheight);
TriangleRow* rowAcc = &rowActive[v];
++rowAcc->remaining;
if (rowAcc->scanline)
{
memset(rowAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth);
}
}
yFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(yFrom) + yFrom->sizeInBytes);
}
// Filter image
for (FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; )
{
// Create accumulation rows as needed
for (size_t j = 0; j < yFrom->count; ++j)
{
size_t v = yFrom->to[j].u;
assert(v < nheight);
TriangleRow* rowAcc = &rowActive[v];
if (!rowAcc->scanline)
{
if (rowFree)
{
// Steal and reuse scanline from 'free row' list
// (it will always be at least as wide as nwidth due to loop decending order)
assert(rowFree->scanline != nullptr);
rowAcc->scanline.reset(rowFree->scanline.release());
rowFree = rowFree->next;
}
else
{
rowAcc->scanline.reset(static_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR) * nwidth, 16)));
if (!rowAcc->scanline)
return E_OUTOFMEMORY;
}
memset(rowAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth);
}
}
// Load source scanline
if ((pSrc + rowPitch) > pEndSrc)
return E_FAIL;
if (!_LoadScanlineLinear(row, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
// Process row
size_t x = 0;
for (FilterFrom* xFrom = tfX->from; xFrom < xFromEnd; ++x)
{
for (size_t j = 0; j < yFrom->count; ++j)
{
size_t v = yFrom->to[j].u;
assert(v < nheight);
float yweight = yFrom->to[j].weight;
XMVECTOR* accPtr = rowActive[v].scanline.get();
if (!accPtr)
return E_POINTER;
for (size_t k = 0; k < xFrom->count; ++k)
{
size_t u = xFrom->to[k].u;
assert(u < nwidth);
XMVECTOR weight = XMVectorReplicate(yweight * xFrom->to[k].weight);
assert(x < width);
accPtr[u] = XMVectorMultiplyAdd(row[x], weight, accPtr[u]);
}
}
xFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(xFrom) + xFrom->sizeInBytes);
}
// Write completed accumulation rows
for (size_t j = 0; j < yFrom->count; ++j)
{
size_t v = yFrom->to[j].u;
assert(v < nheight);
TriangleRow* rowAcc = &rowActive[v];
assert(rowAcc->remaining > 0);
--rowAcc->remaining;
if (!rowAcc->remaining)
{
XMVECTOR* pAccSrc = rowAcc->scanline.get();
if (!pAccSrc)
return E_POINTER;
switch (dest->format)
{
case DXGI_FORMAT_R10G10B10A2_UNORM:
case DXGI_FORMAT_R10G10B10A2_UINT:
{
// Need to slightly bias results for floating-point error accumulation which can
// be visible with harshly quantized values
static const XMVECTORF32 Bias = { { { 0.f, 0.f, 0.f, 0.1f } } };
XMVECTOR* ptr = pAccSrc;
for (size_t i = 0; i < dest->width; ++i, ++ptr)
{
*ptr = XMVectorAdd(*ptr, Bias);
}
}
break;
default:
break;
}
// This performs any required clamping
if (!_StoreScanlineLinear(pDest + (dest->rowPitch * v), dest->rowPitch, dest->format, pAccSrc, dest->width, filter))
return E_FAIL;
// Put row on freelist to reuse it's allocated scanline
rowAcc->next = rowFree;
rowFree = rowAcc;
}
}
yFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(yFrom) + yFrom->sizeInBytes);
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
}
return S_OK;
}
//-------------------------------------------------------------------------------------
// Generate volume mip-map helpers
//-------------------------------------------------------------------------------------
HRESULT Setup3DMips(
_In_reads_(depth) const Image* baseImages,
size_t depth,
size_t levels,
_Out_ ScratchImage& mipChain)
{
if (!baseImages || !depth)
return E_INVALIDARG;
assert(levels > 1);
size_t width = baseImages[0].width;
size_t height = baseImages[0].height;
HRESULT hr = mipChain.Initialize3D(baseImages[0].format, width, height, depth, levels);
if (FAILED(hr))
return hr;
// Copy base images to top slice
for (size_t slice = 0; slice < depth; ++slice)
{
const Image& src = baseImages[slice];
const Image *dest = mipChain.GetImage(0, 0, slice);
if (!dest)
{
mipChain.Release();
return E_POINTER;
}
assert(src.format == dest->format);
uint8_t* pDest = dest->pixels;
if (!pDest)
{
mipChain.Release();
return E_POINTER;
}
const uint8_t *pSrc = src.pixels;
size_t rowPitch = src.rowPitch;
for (size_t h = 0; h < height; ++h)
{
size_t msize = std::min<size_t>(dest->rowPitch, rowPitch);
memcpy_s(pDest, dest->rowPitch, pSrc, msize);
pSrc += rowPitch;
pDest += dest->rowPitch;
}
}
return S_OK;
}
//--- 3D Point Filter ---
HRESULT Generate3DMipsPointFilter(size_t depth, size_t levels, const ScratchImage& mipChain)
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (2 scanlines)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 2), 16)));
if (!scanline)
return E_OUTOFMEMORY;
XMVECTOR* target = scanline.get();
XMVECTOR* row = target + width;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
#ifdef _DEBUG
memset(row, 0xCD, sizeof(XMVECTOR)*width);
#endif
if (depth > 1)
{
// 3D point filter
size_t ndepth = depth >> 1;
size_t zinc = (depth << 16) / ndepth;
size_t sz = 0;
for (size_t slice = 0; slice < ndepth; ++slice)
{
const Image* src = mipChain.GetImage(level - 1, 0, (sz >> 16));
const Image* dest = mipChain.GetImage(level, 0, slice);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
size_t xinc = (width << 16) / nwidth;
size_t yinc = (height << 16) / nheight;
size_t lasty = size_t(-1);
size_t sy = 0;
for (size_t y = 0; y < nheight; ++y)
{
if ((lasty ^ sy) >> 16)
{
if (!_LoadScanline(row, width, pSrc + (rowPitch * (sy >> 16)), rowPitch, src->format))
return E_FAIL;
lasty = sy;
}
size_t sx = 0;
for (size_t x = 0; x < nwidth; ++x)
{
target[x] = row[sx >> 16];
sx += xinc;
}
if (!_StoreScanline(pDest, dest->rowPitch, dest->format, target, nwidth))
return E_FAIL;
pDest += dest->rowPitch;
sy += yinc;
}
sz += zinc;
}
}
else
{
// 2D point filter
const Image* src = mipChain.GetImage(level - 1, 0, 0);
const Image* dest = mipChain.GetImage(level, 0, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
size_t xinc = (width << 16) / nwidth;
size_t yinc = (height << 16) / nheight;
size_t lasty = size_t(-1);
size_t sy = 0;
for (size_t y = 0; y < nheight; ++y)
{
if ((lasty ^ sy) >> 16)
{
if (!_LoadScanline(row, width, pSrc + (rowPitch * (sy >> 16)), rowPitch, src->format))
return E_FAIL;
lasty = sy;
}
size_t sx = 0;
for (size_t x = 0; x < nwidth; ++x)
{
target[x] = row[sx >> 16];
sx += xinc;
}
if (!_StoreScanline(pDest, dest->rowPitch, dest->format, target, nwidth))
return E_FAIL;
pDest += dest->rowPitch;
sy += yinc;
}
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}
//--- 3D Box Filter ---
HRESULT Generate3DMipsBoxFilter(size_t depth, size_t levels, DWORD filter, const ScratchImage& mipChain)
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
if (!ispow2(width) || !ispow2(height) || !ispow2(depth))
return E_FAIL;
// Allocate temporary space (5 scanlines)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 5), 16)));
if (!scanline)
return E_OUTOFMEMORY;
XMVECTOR* target = scanline.get();
XMVECTOR* urow0 = target + width;
XMVECTOR* urow1 = target + width * 2;
XMVECTOR* vrow0 = target + width * 3;
XMVECTOR* vrow1 = target + width * 4;
const XMVECTOR* urow2 = urow0 + 1;
const XMVECTOR* urow3 = urow1 + 1;
const XMVECTOR* vrow2 = vrow0 + 1;
const XMVECTOR* vrow3 = vrow1 + 1;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
if (height <= 1)
{
urow1 = urow0;
vrow1 = vrow0;
}
if (width <= 1)
{
urow2 = urow0;
urow3 = urow1;
vrow2 = vrow0;
vrow3 = vrow1;
}
if (depth > 1)
{
// 3D box filter
size_t ndepth = depth >> 1;
for (size_t slice = 0; slice < ndepth; ++slice)
{
size_t slicea = std::min<size_t>(slice * 2, depth - 1);
size_t sliceb = std::min<size_t>(slicea + 1, depth - 1);
const Image* srca = mipChain.GetImage(level - 1, 0, slicea);
const Image* srcb = mipChain.GetImage(level - 1, 0, sliceb);
const Image* dest = mipChain.GetImage(level, 0, slice);
if (!srca || !srcb || !dest)
return E_POINTER;
const uint8_t* pSrc1 = srca->pixels;
const uint8_t* pSrc2 = srcb->pixels;
uint8_t* pDest = dest->pixels;
size_t aRowPitch = srca->rowPitch;
size_t bRowPitch = srcb->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
for (size_t y = 0; y < nheight; ++y)
{
if (!_LoadScanlineLinear(urow0, width, pSrc1, aRowPitch, srca->format, filter))
return E_FAIL;
pSrc1 += aRowPitch;
if (urow0 != urow1)
{
if (!_LoadScanlineLinear(urow1, width, pSrc1, aRowPitch, srca->format, filter))
return E_FAIL;
pSrc1 += aRowPitch;
}
if (!_LoadScanlineLinear(vrow0, width, pSrc2, bRowPitch, srcb->format, filter))
return E_FAIL;
pSrc2 += bRowPitch;
if (vrow0 != vrow1)
{
if (!_LoadScanlineLinear(vrow1, width, pSrc2, bRowPitch, srcb->format, filter))
return E_FAIL;
pSrc2 += bRowPitch;
}
for (size_t x = 0; x < nwidth; ++x)
{
size_t x2 = x << 1;
AVERAGE8(target[x], urow0[x2], urow1[x2], urow2[x2], urow3[x2],
vrow0[x2], vrow1[x2], vrow2[x2], vrow3[x2]);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
}
else
{
// 2D box filter
const Image* src = mipChain.GetImage(level - 1, 0, 0);
const Image* dest = mipChain.GetImage(level, 0, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t nwidth = (width > 1) ? (width >> 1) : 1;
size_t nheight = (height > 1) ? (height >> 1) : 1;
for (size_t y = 0; y < nheight; ++y)
{
if (!_LoadScanlineLinear(urow0, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
if (urow0 != urow1)
{
if (!_LoadScanlineLinear(urow1, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
}
for (size_t x = 0; x < nwidth; ++x)
{
size_t x2 = x << 1;
AVERAGE4(target[x], urow0[x2], urow1[x2], urow2[x2], urow3[x2]);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}
//--- 3D Linear Filter ---
HRESULT Generate3DMipsLinearFilter(size_t depth, size_t levels, DWORD filter, const ScratchImage& mipChain)
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (5 scanlines, plus X/Y/Z filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 5), 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<LinearFilter[]> lf(new (std::nothrow) LinearFilter[width + height + depth]);
if (!lf)
return E_OUTOFMEMORY;
LinearFilter* lfX = lf.get();
LinearFilter* lfY = lf.get() + width;
LinearFilter* lfZ = lf.get() + width + height;
XMVECTOR* target = scanline.get();
XMVECTOR* urow0 = target + width;
XMVECTOR* urow1 = target + width * 2;
XMVECTOR* vrow0 = target + width * 3;
XMVECTOR* vrow1 = target + width * 4;
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
size_t nwidth = (width > 1) ? (width >> 1) : 1;
_CreateLinearFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, lfX);
size_t nheight = (height > 1) ? (height >> 1) : 1;
_CreateLinearFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, lfY);
#ifdef _DEBUG
memset(urow0, 0xCD, sizeof(XMVECTOR)*width);
memset(urow1, 0xDD, sizeof(XMVECTOR)*width);
memset(vrow0, 0xED, sizeof(XMVECTOR)*width);
memset(vrow1, 0xFD, sizeof(XMVECTOR)*width);
#endif
if (depth > 1)
{
// 3D linear filter
size_t ndepth = depth >> 1;
_CreateLinearFilter(depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, lfZ);
for (size_t slice = 0; slice < ndepth; ++slice)
{
auto& toZ = lfZ[slice];
const Image* srca = mipChain.GetImage(level - 1, 0, toZ.u0);
const Image* srcb = mipChain.GetImage(level - 1, 0, toZ.u1);
if (!srca || !srcb)
return E_POINTER;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
const Image* dest = mipChain.GetImage(level, 0, slice);
if (!dest)
return E_POINTER;
uint8_t* pDest = dest->pixels;
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = lfY[y];
if (toY.u0 != u0)
{
if (toY.u0 != u1)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow0, width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(vrow0, width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter))
return E_FAIL;
}
else
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow0, urow1);
std::swap(vrow0, vrow1);
}
}
if (toY.u1 != u1)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(urow1, width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(vrow1, width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = lfX[x];
TRILINEAR_INTERPOLATE(target[x], toX, toY, toZ, urow0, urow1, vrow0, vrow1);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
}
else
{
// 2D linear filter
const Image* src = mipChain.GetImage(level - 1, 0, 0);
const Image* dest = mipChain.GetImage(level, 0, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = lfY[y];
if (toY.u0 != u0)
{
if (toY.u0 != u1)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow0, width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
return E_FAIL;
}
else
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow0, urow1);
}
}
if (toY.u1 != u1)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(urow1, width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = lfX[x];
BILINEAR_INTERPOLATE(target[x], toX, toY, urow0, urow1);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}
//--- 3D Cubic Filter ---
HRESULT Generate3DMipsCubicFilter(size_t depth, size_t levels, DWORD filter, const ScratchImage& mipChain)
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (17 scanlines, plus X/Y/Z filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc((sizeof(XMVECTOR)*width * 17), 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<CubicFilter[]> cf(new (std::nothrow) CubicFilter[width + height + depth]);
if (!cf)
return E_OUTOFMEMORY;
CubicFilter* cfX = cf.get();
CubicFilter* cfY = cf.get() + width;
CubicFilter* cfZ = cf.get() + width + height;
XMVECTOR* target = scanline.get();
XMVECTOR* urow[4];
XMVECTOR* vrow[4];
XMVECTOR* srow[4];
XMVECTOR* trow[4];
XMVECTOR *ptr = scanline.get() + width;
for (size_t j = 0; j < 4; ++j)
{
urow[j] = ptr; ptr += width;
vrow[j] = ptr; ptr += width;
srow[j] = ptr; ptr += width;
trow[j] = ptr; ptr += width;
}
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
size_t nwidth = (width > 1) ? (width >> 1) : 1;
_CreateCubicFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX);
size_t nheight = (height > 1) ? (height >> 1) : 1;
_CreateCubicFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY);
#ifdef _DEBUG
for (size_t j = 0; j < 4; ++j)
{
memset(urow[j], 0xCD, sizeof(XMVECTOR)*width);
memset(vrow[j], 0xDD, sizeof(XMVECTOR)*width);
memset(srow[j], 0xED, sizeof(XMVECTOR)*width);
memset(trow[j], 0xFD, sizeof(XMVECTOR)*width);
}
#endif
if (depth > 1)
{
// 3D cubic filter
size_t ndepth = depth >> 1;
_CreateCubicFilter(depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, (filter & TEX_FILTER_MIRROR_W) != 0, cfZ);
for (size_t slice = 0; slice < ndepth; ++slice)
{
auto& toZ = cfZ[slice];
const Image* srca = mipChain.GetImage(level - 1, 0, toZ.u0);
const Image* srcb = mipChain.GetImage(level - 1, 0, toZ.u1);
const Image* srcc = mipChain.GetImage(level - 1, 0, toZ.u2);
const Image* srcd = mipChain.GetImage(level - 1, 0, toZ.u3);
if (!srca || !srcb || !srcc || !srcd)
return E_POINTER;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
size_t u2 = size_t(-1);
size_t u3 = size_t(-1);
const Image* dest = mipChain.GetImage(level, 0, slice);
if (!dest)
return E_POINTER;
uint8_t* pDest = dest->pixels;
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = cfY[y];
// Scanline 1
if (toY.u0 != u0)
{
if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow[0], width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(urow[1], width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(urow[2], width, srcc->pixels + (srcc->rowPitch * u0), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(urow[3], width, srcd->pixels + (srcd->rowPitch * u0), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else if (toY.u0 == u1)
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow[0], vrow[0]);
std::swap(urow[1], vrow[1]);
std::swap(urow[2], vrow[2]);
std::swap(urow[3], vrow[3]);
}
else if (toY.u0 == u2)
{
u0 = u2;
u2 = size_t(-1);
std::swap(urow[0], srow[0]);
std::swap(urow[1], srow[1]);
std::swap(urow[2], srow[2]);
std::swap(urow[3], srow[3]);
}
else if (toY.u0 == u3)
{
u0 = u3;
u3 = size_t(-1);
std::swap(urow[0], trow[0]);
std::swap(urow[1], trow[1]);
std::swap(urow[2], trow[2]);
std::swap(urow[3], trow[3]);
}
}
// Scanline 2
if (toY.u1 != u1)
{
if (toY.u1 != u2 && toY.u1 != u3)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(vrow[0], width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(vrow[1], width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(vrow[2], width, srcc->pixels + (srcc->rowPitch * u1), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(vrow[3], width, srcd->pixels + (srcd->rowPitch * u1), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else if (toY.u1 == u2)
{
u1 = u2;
u2 = size_t(-1);
std::swap(vrow[0], srow[0]);
std::swap(vrow[1], srow[1]);
std::swap(vrow[2], srow[2]);
std::swap(vrow[3], srow[3]);
}
else if (toY.u1 == u3)
{
u1 = u3;
u3 = size_t(-1);
std::swap(vrow[0], trow[0]);
std::swap(vrow[1], trow[1]);
std::swap(vrow[2], trow[2]);
std::swap(vrow[3], trow[3]);
}
}
// Scanline 3
if (toY.u2 != u2)
{
if (toY.u2 != u3)
{
u2 = toY.u2;
if (!_LoadScanlineLinear(srow[0], width, srca->pixels + (srca->rowPitch * u2), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(srow[1], width, srcb->pixels + (srcb->rowPitch * u2), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(srow[2], width, srcc->pixels + (srcc->rowPitch * u2), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(srow[3], width, srcd->pixels + (srcd->rowPitch * u2), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else
{
u2 = u3;
u3 = size_t(-1);
std::swap(srow[0], trow[0]);
std::swap(srow[1], trow[1]);
std::swap(srow[2], trow[2]);
std::swap(srow[3], trow[3]);
}
}
// Scanline 4
if (toY.u3 != u3)
{
u3 = toY.u3;
if (!_LoadScanlineLinear(trow[0], width, srca->pixels + (srca->rowPitch * u3), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(trow[1], width, srcb->pixels + (srcb->rowPitch * u3), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(trow[2], width, srcc->pixels + (srcc->rowPitch * u3), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(trow[3], width, srcd->pixels + (srcd->rowPitch * u3), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = cfX[x];
XMVECTOR D[4];
for (size_t j = 0; j < 4; ++j)
{
XMVECTOR C0, C1, C2, C3;
CUBIC_INTERPOLATE(C0, toX.x, urow[j][toX.u0], urow[j][toX.u1], urow[j][toX.u2], urow[j][toX.u3]);
CUBIC_INTERPOLATE(C1, toX.x, vrow[j][toX.u0], vrow[j][toX.u1], vrow[j][toX.u2], vrow[j][toX.u3]);
CUBIC_INTERPOLATE(C2, toX.x, srow[j][toX.u0], srow[j][toX.u1], srow[j][toX.u2], srow[j][toX.u3]);
CUBIC_INTERPOLATE(C3, toX.x, trow[j][toX.u0], trow[j][toX.u1], trow[j][toX.u2], trow[j][toX.u3]);
CUBIC_INTERPOLATE(D[j], toY.x, C0, C1, C2, C3);
}
CUBIC_INTERPOLATE(target[x], toZ.x, D[0], D[1], D[2], D[3]);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
}
else
{
// 2D cubic filter
const Image* src = mipChain.GetImage(level - 1, 0, 0);
const Image* dest = mipChain.GetImage(level, 0, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
size_t u2 = size_t(-1);
size_t u3 = size_t(-1);
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = cfY[y];
// Scanline 1
if (toY.u0 != u0)
{
if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow[0], width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u0 == u1)
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow[0], vrow[0]);
}
else if (toY.u0 == u2)
{
u0 = u2;
u2 = size_t(-1);
std::swap(urow[0], srow[0]);
}
else if (toY.u0 == u3)
{
u0 = u3;
u3 = size_t(-1);
std::swap(urow[0], trow[0]);
}
}
// Scanline 2
if (toY.u1 != u1)
{
if (toY.u1 != u2 && toY.u1 != u3)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(vrow[0], width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u1 == u2)
{
u1 = u2;
u2 = size_t(-1);
std::swap(vrow[0], srow[0]);
}
else if (toY.u1 == u3)
{
u1 = u3;
u3 = size_t(-1);
std::swap(vrow[0], trow[0]);
}
}
// Scanline 3
if (toY.u2 != u2)
{
if (toY.u2 != u3)
{
u2 = toY.u2;
if (!_LoadScanlineLinear(srow[0], width, pSrc + (rowPitch * u2), rowPitch, src->format, filter))
return E_FAIL;
}
else
{
u2 = u3;
u3 = size_t(-1);
std::swap(srow[0], trow[0]);
}
}
// Scanline 4
if (toY.u3 != u3)
{
u3 = toY.u3;
if (!_LoadScanlineLinear(trow[0], width, pSrc + (rowPitch * u3), rowPitch, src->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = cfX[x];
XMVECTOR C0, C1, C2, C3;
CUBIC_INTERPOLATE(C0, toX.x, urow[0][toX.u0], urow[0][toX.u1], urow[0][toX.u2], urow[0][toX.u3]);
CUBIC_INTERPOLATE(C1, toX.x, vrow[0][toX.u0], vrow[0][toX.u1], vrow[0][toX.u2], vrow[0][toX.u3]);
CUBIC_INTERPOLATE(C2, toX.x, srow[0][toX.u0], srow[0][toX.u1], srow[0][toX.u2], srow[0][toX.u3]);
CUBIC_INTERPOLATE(C3, toX.x, trow[0][toX.u0], trow[0][toX.u1], trow[0][toX.u2], trow[0][toX.u3]);
CUBIC_INTERPOLATE(target[x], toY.x, C0, C1, C2, C3);
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}
//--- 3D Triangle Filter ---
HRESULT Generate3DMipsTriangleFilter(size_t depth, size_t levels, DWORD filter, const ScratchImage& mipChain)
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
using namespace TriangleFilter;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate initial temporary space (1 scanline, accumulation rows, plus X/Y/Z filters)
ScopedAlignedArrayXMVECTOR scanline(static_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR) * width, 16)));
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<TriangleRow[]> sliceActive(new (std::nothrow) TriangleRow[depth]);
if (!sliceActive)
return E_OUTOFMEMORY;
TriangleRow * sliceFree = nullptr;
std::unique_ptr<Filter> tfX, tfY, tfZ;
XMVECTOR* row = scanline.get();
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
size_t nwidth = (width > 1) ? (width >> 1) : 1;
HRESULT hr = _Create(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, tfX);
if (FAILED(hr))
return hr;
size_t nheight = (height > 1) ? (height >> 1) : 1;
hr = _Create(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, tfY);
if (FAILED(hr))
return hr;
size_t ndepth = (depth > 1) ? (depth >> 1) : 1;
hr = _Create(depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, tfZ);
if (FAILED(hr))
return hr;
#ifdef _DEBUG
memset(row, 0xCD, sizeof(XMVECTOR)*width);
#endif
auto xFromEnd = reinterpret_cast<const FilterFrom*>(reinterpret_cast<const uint8_t*>(tfX.get()) + tfX->sizeInBytes);
auto yFromEnd = reinterpret_cast<const FilterFrom*>(reinterpret_cast<const uint8_t*>(tfY.get()) + tfY->sizeInBytes);
auto zFromEnd = reinterpret_cast<const FilterFrom*>(reinterpret_cast<const uint8_t*>(tfZ.get()) + tfZ->sizeInBytes);
// Count times slices get written (and clear out any leftover accumulation slices from last miplevel)
for (FilterFrom* zFrom = tfZ->from; zFrom < zFromEnd; )
{
for (size_t j = 0; j < zFrom->count; ++j)
{
size_t w = zFrom->to[j].u;
assert(w < ndepth);
TriangleRow* sliceAcc = &sliceActive[w];
++sliceAcc->remaining;
if (sliceAcc->scanline)
{
memset(sliceAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth * nheight);
}
}
zFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(zFrom) + zFrom->sizeInBytes);
}
// Filter image
size_t z = 0;
for (FilterFrom* zFrom = tfZ->from; zFrom < zFromEnd; ++z)
{
// Create accumulation slices as needed
for (size_t j = 0; j < zFrom->count; ++j)
{
size_t w = zFrom->to[j].u;
assert(w < ndepth);
TriangleRow* sliceAcc = &sliceActive[w];
if (!sliceAcc->scanline)
{
if (sliceFree)
{
// Steal and reuse scanline from 'free slice' list
// (it will always be at least as large as nwidth*nheight due to loop decending order)
assert(sliceFree->scanline != nullptr);
sliceAcc->scanline.reset(sliceFree->scanline.release());
sliceFree = sliceFree->next;
}
else
{
size_t bytes = sizeof(XMVECTOR) * nwidth * nheight;
sliceAcc->scanline.reset(static_cast<XMVECTOR*>(_aligned_malloc(bytes, 16)));
if (!sliceAcc->scanline)
return E_OUTOFMEMORY;
}
memset(sliceAcc->scanline.get(), 0, sizeof(XMVECTOR) * nwidth * nheight);
}
}
assert(z < depth);
const Image* src = mipChain.GetImage(level - 1, 0, z);
if (!src)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
size_t rowPitch = src->rowPitch;
const uint8_t* pEndSrc = pSrc + rowPitch * height;
for (FilterFrom* yFrom = tfY->from; yFrom < yFromEnd; )
{
// Load source scanline
if ((pSrc + rowPitch) > pEndSrc)
return E_FAIL;
if (!_LoadScanlineLinear(row, width, pSrc, rowPitch, src->format, filter))
return E_FAIL;
pSrc += rowPitch;
// Process row
size_t x = 0;
for (FilterFrom* xFrom = tfX->from; xFrom < xFromEnd; ++x)
{
for (size_t j = 0; j < zFrom->count; ++j)
{
size_t w = zFrom->to[j].u;
assert(w < ndepth);
float zweight = zFrom->to[j].weight;
XMVECTOR* accSlice = sliceActive[w].scanline.get();
if (!accSlice)
return E_POINTER;
for (size_t k = 0; k < yFrom->count; ++k)
{
size_t v = yFrom->to[k].u;
assert(v < nheight);
float yweight = yFrom->to[k].weight;
XMVECTOR * accPtr = accSlice + v * nwidth;
for (size_t l = 0; l < xFrom->count; ++l)
{
size_t u = xFrom->to[l].u;
assert(u < nwidth);
XMVECTOR weight = XMVectorReplicate(zweight * yweight * xFrom->to[l].weight);
assert(x < width);
accPtr[u] = XMVectorMultiplyAdd(row[x], weight, accPtr[u]);
}
}
}
xFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(xFrom) + xFrom->sizeInBytes);
}
yFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(yFrom) + yFrom->sizeInBytes);
}
// Write completed accumulation slices
for (size_t j = 0; j < zFrom->count; ++j)
{
size_t w = zFrom->to[j].u;
assert(w < ndepth);
TriangleRow* sliceAcc = &sliceActive[w];
assert(sliceAcc->remaining > 0);
--sliceAcc->remaining;
if (!sliceAcc->remaining)
{
const Image* dest = mipChain.GetImage(level, 0, w);
XMVECTOR* pAccSrc = sliceAcc->scanline.get();
if (!dest || !pAccSrc)
return E_POINTER;
uint8_t* pDest = dest->pixels;
for (size_t h = 0; h < nheight; ++h)
{
switch (dest->format)
{
case DXGI_FORMAT_R10G10B10A2_UNORM:
case DXGI_FORMAT_R10G10B10A2_UINT:
{
// Need to slightly bias results for floating-point error accumulation which can
// be visible with harshly quantized values
static const XMVECTORF32 Bias = { { { 0.f, 0.f, 0.f, 0.1f } } };
XMVECTOR* ptr = pAccSrc;
for (size_t i = 0; i < dest->width; ++i, ++ptr)
{
*ptr = XMVectorAdd(*ptr, Bias);
}
}
break;
default:
break;
}
// This performs any required clamping
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, pAccSrc, dest->width, filter))
return E_FAIL;
pDest += dest->rowPitch;
pAccSrc += nwidth;
}
// Put slice on freelist to reuse it's allocated scanline
sliceAcc->next = sliceFree;
sliceFree = sliceAcc;
}
}
zFrom = reinterpret_cast<FilterFrom*>(reinterpret_cast<uint8_t*>(zFrom) + zFrom->sizeInBytes);
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}
}
//=====================================================================================
// Entry-points
//=====================================================================================
//-------------------------------------------------------------------------------------
// Generate mipmap chain
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT DirectX::GenerateMipMaps(
const Image& baseImage,
DWORD filter,
size_t levels,
ScratchImage& mipChain,
bool allow1D)
{
if (!IsValid(baseImage.format))
return E_INVALIDARG;
if (!baseImage.pixels)
return E_POINTER;
if (!_CalculateMipLevels(baseImage.width, baseImage.height, levels))
return E_INVALIDARG;
if (levels <= 1)
return E_INVALIDARG;
if (IsCompressed(baseImage.format) || IsTypeless(baseImage.format) || IsPlanar(baseImage.format) || IsPalettized(baseImage.format))
{
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
HRESULT hr = E_UNEXPECTED;
static_assert(TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK");
if (UseWICFiltering(baseImage.format, filter))
{
//--- Use WIC filtering to generate mipmaps -----------------------------------
switch (filter & TEX_FILTER_MASK)
{
case 0:
case TEX_FILTER_POINT:
case TEX_FILTER_FANT: // Equivalent to Box filter
case TEX_FILTER_LINEAR:
case TEX_FILTER_CUBIC:
{
static_assert(TEX_FILTER_FANT == TEX_FILTER_BOX, "TEX_FILTER_ flag alias mismatch");
WICPixelFormatGUID pfGUID;
if (_DXGIToWIC(baseImage.format, pfGUID, true))
{
// Case 1: Base image format is supported by Windows Imaging Component
hr = (baseImage.height > 1 || !allow1D)
? mipChain.Initialize2D(baseImage.format, baseImage.width, baseImage.height, 1, levels)
: mipChain.Initialize1D(baseImage.format, baseImage.width, 1, levels);
if (FAILED(hr))
return hr;
return GenerateMipMapsUsingWIC(baseImage, filter, levels, pfGUID, mipChain, 0);
}
else
{
// Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back
assert(baseImage.format != DXGI_FORMAT_R32G32B32A32_FLOAT);
ScratchImage temp;
hr = _ConvertToR32G32B32A32(baseImage, temp);
if (FAILED(hr))
return hr;
const Image *timg = temp.GetImage(0, 0, 0);
if (!timg)
return E_POINTER;
ScratchImage tMipChain;
hr = (baseImage.height > 1 || !allow1D)
? tMipChain.Initialize2D(DXGI_FORMAT_R32G32B32A32_FLOAT, baseImage.width, baseImage.height, 1, levels)
: tMipChain.Initialize1D(DXGI_FORMAT_R32G32B32A32_FLOAT, baseImage.width, 1, levels);
if (FAILED(hr))
return hr;
hr = GenerateMipMapsUsingWIC(*timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, 0);
if (FAILED(hr))
return hr;
temp.Release();
return _ConvertFromR32G32B32A32(tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), baseImage.format, mipChain);
}
}
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}
else
{
//--- Use custom filters to generate mipmaps ----------------------------------
TexMetadata mdata = {};
mdata.width = baseImage.width;
if (baseImage.height > 1 || !allow1D)
{
mdata.height = baseImage.height;
mdata.dimension = TEX_DIMENSION_TEXTURE2D;
}
else
{
mdata.height = 1;
mdata.dimension = TEX_DIMENSION_TEXTURE1D;
}
mdata.depth = mdata.arraySize = 1;
mdata.mipLevels = levels;
mdata.format = baseImage.format;
DWORD filter_select = (filter & TEX_FILTER_MASK);
if (!filter_select)
{
// Default filter choice
filter_select = (ispow2(baseImage.width) && ispow2(baseImage.height)) ? TEX_FILTER_BOX : TEX_FILTER_LINEAR;
}
switch (filter_select)
{
case TEX_FILTER_BOX:
hr = Setup2DMips(&baseImage, 1, mdata, mipChain);
if (FAILED(hr))
return hr;
hr = Generate2DMipsBoxFilter(levels, filter, mipChain, 0);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_POINT:
hr = Setup2DMips(&baseImage, 1, mdata, mipChain);
if (FAILED(hr))
return hr;
hr = Generate2DMipsPointFilter(levels, mipChain, 0);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_LINEAR:
hr = Setup2DMips(&baseImage, 1, mdata, mipChain);
if (FAILED(hr))
return hr;
hr = Generate2DMipsLinearFilter(levels, filter, mipChain, 0);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_CUBIC:
hr = Setup2DMips(&baseImage, 1, mdata, mipChain);
if (FAILED(hr))
return hr;
hr = Generate2DMipsCubicFilter(levels, filter, mipChain, 0);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_TRIANGLE:
hr = Setup2DMips(&baseImage, 1, mdata, mipChain);
if (FAILED(hr))
return hr;
hr = Generate2DMipsTriangleFilter(levels, filter, mipChain, 0);
if (FAILED(hr))
mipChain.Release();
return hr;
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}
}
_Use_decl_annotations_
HRESULT DirectX::GenerateMipMaps(
const Image* srcImages,
size_t nimages,
const TexMetadata& metadata,
DWORD filter,
size_t levels,
ScratchImage& mipChain)
{
if (!srcImages || !nimages || !IsValid(metadata.format))
return E_INVALIDARG;
if (metadata.IsVolumemap()
|| IsCompressed(metadata.format) || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format))
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
if (!_CalculateMipLevels(metadata.width, metadata.height, levels))
return E_INVALIDARG;
if (levels <= 1)
return E_INVALIDARG;
std::vector<Image> baseImages;
baseImages.reserve(metadata.arraySize);
for (size_t item = 0; item < metadata.arraySize; ++item)
{
size_t index = metadata.ComputeIndex(0, item, 0);
if (index >= nimages)
return E_FAIL;
const Image& src = srcImages[index];
if (!src.pixels)
return E_POINTER;
if (src.format != metadata.format || src.width != metadata.width || src.height != metadata.height)
{
// All base images must be the same format, width, and height
return E_FAIL;
}
baseImages.push_back(src);
}
assert(baseImages.size() == metadata.arraySize);
HRESULT hr = E_UNEXPECTED;
static_assert(TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK");
if (!metadata.IsPMAlpha() && UseWICFiltering(metadata.format, filter))
{
//--- Use WIC filtering to generate mipmaps -----------------------------------
switch (filter & TEX_FILTER_MASK)
{
case 0:
case TEX_FILTER_POINT:
case TEX_FILTER_FANT: // Equivalent to Box filter
case TEX_FILTER_LINEAR:
case TEX_FILTER_CUBIC:
{
static_assert(TEX_FILTER_FANT == TEX_FILTER_BOX, "TEX_FILTER_ flag alias mismatch");
WICPixelFormatGUID pfGUID;
if (_DXGIToWIC(metadata.format, pfGUID, true))
{
// Case 1: Base image format is supported by Windows Imaging Component
TexMetadata mdata2 = metadata;
mdata2.mipLevels = levels;
hr = mipChain.Initialize(mdata2);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = GenerateMipMapsUsingWIC(baseImages[item], filter, levels, pfGUID, mipChain, item);
if (FAILED(hr))
{
mipChain.Release();
return hr;
}
}
return S_OK;
}
else
{
// Case 2: Base image format is not supported by WIC, so we have to convert, generate, and convert back
assert(metadata.format != DXGI_FORMAT_R32G32B32A32_FLOAT);
TexMetadata mdata2 = metadata;
mdata2.mipLevels = levels;
mdata2.format = DXGI_FORMAT_R32G32B32A32_FLOAT;
ScratchImage tMipChain;
hr = tMipChain.Initialize(mdata2);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
ScratchImage temp;
hr = _ConvertToR32G32B32A32(baseImages[item], temp);
if (FAILED(hr))
return hr;
const Image *timg = temp.GetImage(0, 0, 0);
if (!timg)
return E_POINTER;
hr = GenerateMipMapsUsingWIC(*timg, filter, levels, GUID_WICPixelFormat128bppRGBAFloat, tMipChain, item);
if (FAILED(hr))
return hr;
}
return _ConvertFromR32G32B32A32(tMipChain.GetImages(), tMipChain.GetImageCount(), tMipChain.GetMetadata(), metadata.format, mipChain);
}
}
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}
else
{
//--- Use custom filters to generate mipmaps ----------------------------------
TexMetadata mdata2 = metadata;
mdata2.mipLevels = levels;
DWORD filter_select = (filter & TEX_FILTER_MASK);
if (!filter_select)
{
// Default filter choice
filter_select = (ispow2(metadata.width) && ispow2(metadata.height)) ? TEX_FILTER_BOX : TEX_FILTER_LINEAR;
}
switch (filter_select)
{
case TEX_FILTER_BOX:
hr = Setup2DMips(&baseImages[0], metadata.arraySize, mdata2, mipChain);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = Generate2DMipsBoxFilter(levels, filter, mipChain, item);
if (FAILED(hr))
mipChain.Release();
}
return hr;
case TEX_FILTER_POINT:
hr = Setup2DMips(&baseImages[0], metadata.arraySize, mdata2, mipChain);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = Generate2DMipsPointFilter(levels, mipChain, item);
if (FAILED(hr))
mipChain.Release();
}
return hr;
case TEX_FILTER_LINEAR:
hr = Setup2DMips(&baseImages[0], metadata.arraySize, mdata2, mipChain);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = Generate2DMipsLinearFilter(levels, filter, mipChain, item);
if (FAILED(hr))
mipChain.Release();
}
return hr;
case TEX_FILTER_CUBIC:
hr = Setup2DMips(&baseImages[0], metadata.arraySize, mdata2, mipChain);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = Generate2DMipsCubicFilter(levels, filter, mipChain, item);
if (FAILED(hr))
mipChain.Release();
}
return hr;
case TEX_FILTER_TRIANGLE:
hr = Setup2DMips(&baseImages[0], metadata.arraySize, mdata2, mipChain);
if (FAILED(hr))
return hr;
for (size_t item = 0; item < metadata.arraySize; ++item)
{
hr = Generate2DMipsTriangleFilter(levels, filter, mipChain, item);
if (FAILED(hr))
mipChain.Release();
}
return hr;
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}
}
//-------------------------------------------------------------------------------------
// Generate mipmap chain for volume texture
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT DirectX::GenerateMipMaps3D(
const Image* baseImages,
size_t depth,
DWORD filter,
size_t levels,
ScratchImage& mipChain)
{
if (!baseImages || !depth)
return E_INVALIDARG;
if (filter & TEX_FILTER_FORCE_WIC)
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
DXGI_FORMAT format = baseImages[0].format;
size_t width = baseImages[0].width;
size_t height = baseImages[0].height;
if (!_CalculateMipLevels3D(width, height, depth, levels))
return E_INVALIDARG;
if (levels <= 1)
return E_INVALIDARG;
for (size_t slice = 0; slice < depth; ++slice)
{
if (!baseImages[slice].pixels)
return E_POINTER;
if (baseImages[slice].format != format || baseImages[slice].width != width || baseImages[slice].height != height)
{
// All base images must be the same format, width, and height
return E_FAIL;
}
}
if (IsCompressed(format) || IsTypeless(format) || IsPlanar(format) || IsPalettized(format))
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
static_assert(TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK");
HRESULT hr = E_UNEXPECTED;
DWORD filter_select = (filter & TEX_FILTER_MASK);
if (!filter_select)
{
// Default filter choice
filter_select = (ispow2(width) && ispow2(height) && ispow2(depth)) ? TEX_FILTER_BOX : TEX_FILTER_TRIANGLE;
}
switch (filter_select)
{
case TEX_FILTER_BOX:
hr = Setup3DMips(baseImages, depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsBoxFilter(depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_POINT:
hr = Setup3DMips(baseImages, depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsPointFilter(depth, levels, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_LINEAR:
hr = Setup3DMips(baseImages, depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsLinearFilter(depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_CUBIC:
hr = Setup3DMips(baseImages, depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsCubicFilter(depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_TRIANGLE:
hr = Setup3DMips(baseImages, depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsTriangleFilter(depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}
_Use_decl_annotations_
HRESULT DirectX::GenerateMipMaps3D(
const Image* srcImages,
size_t nimages,
const TexMetadata& metadata,
DWORD filter,
size_t levels,
ScratchImage& mipChain)
{
if (!srcImages || !nimages || !IsValid(metadata.format))
return E_INVALIDARG;
if (filter & TEX_FILTER_FORCE_WIC)
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
if (!metadata.IsVolumemap()
|| IsCompressed(metadata.format) || IsTypeless(metadata.format) || IsPlanar(metadata.format) || IsPalettized(metadata.format))
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
if (!_CalculateMipLevels3D(metadata.width, metadata.height, metadata.depth, levels))
return E_INVALIDARG;
if (levels <= 1)
return E_INVALIDARG;
std::vector<Image> baseImages;
baseImages.reserve(metadata.depth);
for (size_t slice = 0; slice < metadata.depth; ++slice)
{
size_t index = metadata.ComputeIndex(0, 0, slice);
if (index >= nimages)
return E_FAIL;
const Image& src = srcImages[index];
if (!src.pixels)
return E_POINTER;
if (src.format != metadata.format || src.width != metadata.width || src.height != metadata.height)
{
// All base images must be the same format, width, and height
return E_FAIL;
}
baseImages.push_back(src);
}
assert(baseImages.size() == metadata.depth);
HRESULT hr = E_UNEXPECTED;
static_assert(TEX_FILTER_POINT == 0x100000, "TEX_FILTER_ flag values don't match TEX_FILTER_MASK");
DWORD filter_select = (filter & TEX_FILTER_MASK);
if (!filter_select)
{
// Default filter choice
filter_select = (ispow2(metadata.width) && ispow2(metadata.height) && ispow2(metadata.depth)) ? TEX_FILTER_BOX : TEX_FILTER_TRIANGLE;
}
switch (filter_select)
{
case TEX_FILTER_BOX:
hr = Setup3DMips(&baseImages[0], metadata.depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsBoxFilter(metadata.depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_POINT:
hr = Setup3DMips(&baseImages[0], metadata.depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsPointFilter(metadata.depth, levels, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_LINEAR:
hr = Setup3DMips(&baseImages[0], metadata.depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsLinearFilter(metadata.depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_CUBIC:
hr = Setup3DMips(&baseImages[0], metadata.depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsCubicFilter(metadata.depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
case TEX_FILTER_TRIANGLE:
hr = Setup3DMips(&baseImages[0], metadata.depth, levels, mipChain);
if (FAILED(hr))
return hr;
hr = Generate3DMipsTriangleFilter(metadata.depth, levels, filter, mipChain);
if (FAILED(hr))
mipChain.Release();
return hr;
default:
return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
}
}