Avoid some unnecessary D3D state changes.

* Add check to make sure we're not setting the same constant buffer
* Add check to make sure we're not setting the same pipeline state
* Cache descriptor tables to avoid unnecessary descriptor copies
  and GPU descriptor table heap entries.

Change-Id: Ia68dacb347736287c24862c74fd3e231288f5c94
Bug: skia:10508
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303663
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
This commit is contained in:
Jim Van Verth 2020-07-20 16:33:26 -04:00 committed by Skia Commit-Bot
parent 5333784c8c
commit dc945ea077
9 changed files with 146 additions and 48 deletions

View File

@ -204,6 +204,7 @@ std::unique_ptr<GrD3DDirectCommandList> GrD3DDirectCommandList::Make(ID3D12Devic
GrD3DDirectCommandList::GrD3DDirectCommandList(gr_cp<ID3D12CommandAllocator> allocator,
gr_cp<ID3D12GraphicsCommandList> commandList)
: GrD3DCommandList(std::move(allocator), std::move(commandList))
, fCurrentPipelineState(nullptr)
, fCurrentRootSignature(nullptr)
, fCurrentVertexBuffer(nullptr)
, fCurrentVertexStride(0)
@ -211,11 +212,14 @@ GrD3DDirectCommandList::GrD3DDirectCommandList(gr_cp<ID3D12CommandAllocator> all
, fCurrentInstanceStride(0)
, fCurrentIndexBuffer(nullptr)
, fCurrentConstantRingBuffer(nullptr)
, fCurrentConstantBufferAddress(0)
, fCurrentSRVCRVDescriptorHeap(nullptr)
, fCurrentSamplerDescriptorHeap(nullptr) {
sk_bzero(fCurrentRootDescriptorTable, sizeof(fCurrentRootDescriptorTable));
}
void GrD3DDirectCommandList::onReset() {
fCurrentPipelineState = nullptr;
fCurrentRootSignature = nullptr;
fCurrentVertexBuffer = nullptr;
fCurrentVertexStride = 0;
@ -226,14 +230,19 @@ void GrD3DDirectCommandList::onReset() {
fCurrentConstantRingBuffer->finishSubmit(fConstantRingBufferSubmitData);
fCurrentConstantRingBuffer = nullptr;
}
fCurrentConstantBufferAddress = 0;
sk_bzero(fCurrentRootDescriptorTable, sizeof(fCurrentRootDescriptorTable));
fCurrentSRVCRVDescriptorHeap = nullptr;
fCurrentSamplerDescriptorHeap = nullptr;
}
void GrD3DDirectCommandList::setPipelineState(sk_sp<GrD3DPipelineState> pipelineState) {
SkASSERT(fIsActive);
fCommandList->SetPipelineState(pipelineState->pipelineState());
this->addResource(std::move(pipelineState));
if (pipelineState.get() != fCurrentPipelineState) {
fCommandList->SetPipelineState(pipelineState->pipelineState());
this->addResource(std::move(pipelineState));
fCurrentPipelineState = pipelineState.get();
}
}
void GrD3DDirectCommandList::setCurrentConstantBuffer(
@ -432,16 +441,26 @@ void GrD3DDirectCommandList::resolveSubresourceRegion(const GrD3DTextureResource
}
}
void GrD3DDirectCommandList::setGraphicsRootConstantBufferView(
unsigned int rootParameterIndex, D3D12_GPU_VIRTUAL_ADDRESS bufferLocation) {
fCommandList->SetGraphicsRootConstantBufferView(rootParameterIndex, bufferLocation);
SkASSERT(rootParameterIndex ==
(unsigned int) GrD3DRootSignature::ParamIndex::kConstantBufferView);
if (bufferLocation != fCurrentConstantBufferAddress) {
fCommandList->SetGraphicsRootConstantBufferView(rootParameterIndex, bufferLocation);
fCurrentConstantBufferAddress = bufferLocation;
}
}
void GrD3DDirectCommandList::setGraphicsRootDescriptorTable(
unsigned int rootParameterIndex, D3D12_GPU_DESCRIPTOR_HANDLE baseDescriptor) {
fCommandList->SetGraphicsRootDescriptorTable(rootParameterIndex, baseDescriptor);
SkASSERT(rootParameterIndex ==
(unsigned int)GrD3DRootSignature::ParamIndex::kSamplerDescriptorTable ||
rootParameterIndex ==
(unsigned int)GrD3DRootSignature::ParamIndex::kTextureDescriptorTable);
if (fCurrentRootDescriptorTable[rootParameterIndex].ptr != baseDescriptor.ptr) {
fCommandList->SetGraphicsRootDescriptorTable(rootParameterIndex, baseDescriptor);
fCurrentRootDescriptorTable[rootParameterIndex] = baseDescriptor;
}
}
void GrD3DDirectCommandList::setDescriptorHeaps(sk_sp<GrRecycledResource> srvCrvHeapResource,

View File

@ -13,6 +13,7 @@
#include "include/private/SkColorData.h"
#include "src/gpu/GrManagedResource.h"
#include "src/gpu/d3d/GrD3DConstantRingBuffer.h"
#include "src/gpu/d3d/GrD3DRootSignature.h"
#include <memory>
@ -186,6 +187,7 @@ private:
void onReset() override;
const GrD3DPipelineState* fCurrentPipelineState;
const GrD3DRootSignature* fCurrentRootSignature;
const GrBuffer* fCurrentVertexBuffer;
size_t fCurrentVertexStride;
@ -196,6 +198,8 @@ private:
GrD3DConstantRingBuffer* fCurrentConstantRingBuffer;
GrD3DConstantRingBuffer::SubmitData fConstantRingBufferSubmitData;
D3D12_GPU_VIRTUAL_ADDRESS fCurrentConstantBufferAddress;
D3D12_GPU_DESCRIPTOR_HANDLE fCurrentRootDescriptorTable[GrD3DRootSignature::kParamIndexCount];
const ID3D12DescriptorHeap* fCurrentSRVCRVDescriptorHeap;
const ID3D12DescriptorHeap* fCurrentSamplerDescriptorHeap;
};

View File

@ -13,17 +13,17 @@ GrD3DDescriptorTableManager::GrD3DDescriptorTableManager(GrD3DGpu* gpu)
: fCBVSRVDescriptorPool(gpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
, fSamplerDescriptorPool(gpu, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) {}
std::unique_ptr<GrD3DDescriptorTable>
sk_sp<GrD3DDescriptorTable>
GrD3DDescriptorTableManager::createShaderOrConstantResourceTable(GrD3DGpu* gpu,
unsigned int size) {
std::unique_ptr<GrD3DDescriptorTable> table = fCBVSRVDescriptorPool.allocateTable(gpu, size);
sk_sp<GrD3DDescriptorTable> table = fCBVSRVDescriptorPool.allocateTable(gpu, size);
this->setHeaps(gpu);
return table;
}
std::unique_ptr<GrD3DDescriptorTable> GrD3DDescriptorTableManager::createSamplerTable(
sk_sp<GrD3DDescriptorTable> GrD3DDescriptorTableManager::createSamplerTable(
GrD3DGpu* gpu, unsigned int size) {
std::unique_ptr<GrD3DDescriptorTable> table = fSamplerDescriptorPool.allocateTable(gpu, size);
sk_sp<GrD3DDescriptorTable> table = fSamplerDescriptorPool.allocateTable(gpu, size);
this->setHeaps(gpu);
return table;
}
@ -74,12 +74,12 @@ sk_sp<GrD3DDescriptorTableManager::Heap> GrD3DDescriptorTableManager::Heap::Make
return sk_sp< GrD3DDescriptorTableManager::Heap>(new Heap(gpu, heap, type, descriptorCount));
}
std::unique_ptr<GrD3DDescriptorTable> GrD3DDescriptorTableManager::Heap::allocateTable(
sk_sp<GrD3DDescriptorTable> GrD3DDescriptorTableManager::Heap::allocateTable(
unsigned int count) {
SkASSERT(fDescriptorCount - fNextAvailable >= count);
unsigned int startIndex = fNextAvailable;
fNextAvailable += count;
return std::unique_ptr<GrD3DDescriptorTable>(
return sk_sp<GrD3DDescriptorTable>(
new GrD3DDescriptorTable(fHeap->getCPUHandle(startIndex).fHandle,
fHeap->getGPUHandle(startIndex).fHandle, fType));
}
@ -97,7 +97,7 @@ GrD3DDescriptorTableManager::HeapPool::HeapPool(GrD3DGpu* gpu, D3D12_DESCRIPTOR_
fDescriptorHeaps.push_back(heap);
}
std::unique_ptr<GrD3DDescriptorTable> GrD3DDescriptorTableManager::HeapPool::allocateTable(
sk_sp<GrD3DDescriptorTable> GrD3DDescriptorTableManager::HeapPool::allocateTable(
GrD3DGpu* gpu, unsigned int count) {
// In back-to-front order, iterate through heaps until we find one we can allocate from.
// Any heap we can't allocate from gets removed from the list.

View File

@ -14,7 +14,7 @@ class GrD3DCommandList;
class GrD3DDirectCommandList;
class GrD3DGpu;
class GrD3DDescriptorTable {
class GrD3DDescriptorTable : public SkRefCnt {
public:
GrD3DDescriptorTable(D3D12_CPU_DESCRIPTOR_HANDLE baseCPU, D3D12_GPU_DESCRIPTOR_HANDLE baseGPU,
D3D12_DESCRIPTOR_HEAP_TYPE type)
@ -42,9 +42,8 @@ class GrD3DDescriptorTableManager {
public:
GrD3DDescriptorTableManager(GrD3DGpu*);
std::unique_ptr<GrD3DDescriptorTable> createShaderOrConstantResourceTable(GrD3DGpu*,
unsigned int count);
std::unique_ptr<GrD3DDescriptorTable> createSamplerTable(GrD3DGpu*, unsigned int count);
sk_sp<GrD3DDescriptorTable> createShaderOrConstantResourceTable(GrD3DGpu*, unsigned int count);
sk_sp<GrD3DDescriptorTable> createSamplerTable(GrD3DGpu*, unsigned int count);
void prepForSubmit(GrD3DGpu* gpu);
@ -54,7 +53,7 @@ private:
static sk_sp<Heap> Make(GrD3DGpu* gpu, D3D12_DESCRIPTOR_HEAP_TYPE type,
unsigned int numDescriptors);
std::unique_ptr<GrD3DDescriptorTable> allocateTable(unsigned int count);
sk_sp<GrD3DDescriptorTable> allocateTable(unsigned int count);
bool canAllocate(unsigned int count) const {
return (fDescriptorCount - fNextAvailable) >= count;
}
@ -100,7 +99,7 @@ private:
public:
HeapPool(GrD3DGpu*, D3D12_DESCRIPTOR_HEAP_TYPE);
std::unique_ptr<GrD3DDescriptorTable> allocateTable(GrD3DGpu*, unsigned int count);
sk_sp<GrD3DDescriptorTable> allocateTable(GrD3DGpu*, unsigned int count);
void recycle(sk_sp<Heap>);
sk_sp<Heap>& currentDescriptorHeap();
void prepForSubmit(GrD3DGpu* gpu);

View File

@ -94,9 +94,8 @@ void GrD3DPipelineState::setAndBindTextures(GrD3DGpu* gpu, const GrPrimitiveProc
const GrPipeline& pipeline) {
SkASSERT(primProcTextures || !primProc.numTextureSamplers());
SkAutoSTMalloc<8, D3D12_CPU_DESCRIPTOR_HANDLE> shaderResourceViews(fNumSamplers);
SkAutoSTMalloc<8, D3D12_CPU_DESCRIPTOR_HANDLE> samplers(fNumSamplers);
SkAutoSTMalloc<8, unsigned int> rangeSizes(fNumSamplers);
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> shaderResourceViews(fNumSamplers);
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> samplers(fNumSamplers);
unsigned int currTextureBinding = 0;
for (int i = 0; i < primProc.numTextureSamplers(); ++i) {
@ -104,29 +103,26 @@ void GrD3DPipelineState::setAndBindTextures(GrD3DGpu* gpu, const GrPrimitiveProc
const auto& sampler = primProc.textureSampler(i);
auto texture = static_cast<GrD3DTexture*>(primProcTextures[i]->peekTexture());
shaderResourceViews[currTextureBinding] = texture->shaderResourceView();
samplers[currTextureBinding] =
samplers[currTextureBinding++] =
gpu->resourceProvider().findOrCreateCompatibleSampler(sampler.samplerState());
gpu->currentCommandList()->addSampledTextureRef(texture);
rangeSizes[currTextureBinding++] = 1;
}
pipeline.visitTextureEffects([&](const GrTextureEffect& te) {
GrSamplerState samplerState = te.samplerState();
auto* texture = static_cast<GrD3DTexture*>(te.texture());
shaderResourceViews[currTextureBinding] = texture->shaderResourceView();
samplers[currTextureBinding] =
samplers[currTextureBinding++] =
gpu->resourceProvider().findOrCreateCompatibleSampler(samplerState);
gpu->currentCommandList()->addSampledTextureRef(texture);
rangeSizes[currTextureBinding++] = 1;
});
if (GrTexture* dstTexture = pipeline.peekDstTexture()) {
auto texture = static_cast<GrD3DTexture*>(dstTexture);
shaderResourceViews[currTextureBinding] = texture->shaderResourceView();
samplers[currTextureBinding] = gpu->resourceProvider().findOrCreateCompatibleSampler(
samplers[currTextureBinding++] = gpu->resourceProvider().findOrCreateCompatibleSampler(
GrSamplerState::Filter::kNearest);
gpu->currentCommandList()->addSampledTextureRef(texture);
rangeSizes[currTextureBinding++] = 1;
}
SkASSERT(fNumSamplers == currTextureBinding);
@ -134,21 +130,15 @@ void GrD3DPipelineState::setAndBindTextures(GrD3DGpu* gpu, const GrPrimitiveProc
// fill in descriptor tables and bind to root signature
if (fNumSamplers > 0) {
// set up and bind shader resource view table
std::unique_ptr<GrD3DDescriptorTable> srvTable =
gpu->resourceProvider().createShaderOrConstantResourceTable(fNumSamplers);
gpu->device()->CopyDescriptors(1, srvTable->baseCpuDescriptorPtr(), &fNumSamplers,
fNumSamplers, shaderResourceViews.get(), rangeSizes.get(),
srvTable->type());
sk_sp<GrD3DDescriptorTable> srvTable =
gpu->resourceProvider().findOrCreateShaderResourceTable(shaderResourceViews);
gpu->currentCommandList()->setGraphicsRootDescriptorTable(
static_cast<unsigned int>(GrD3DRootSignature::ParamIndex::kTextureDescriptorTable),
srvTable->baseGpuDescriptor());
// set up and bind sampler table
std::unique_ptr<GrD3DDescriptorTable> samplerTable =
gpu->resourceProvider().createSamplerTable(fNumSamplers);
gpu->device()->CopyDescriptors(1, samplerTable->baseCpuDescriptorPtr(), &fNumSamplers,
fNumSamplers, samplers.get(), rangeSizes.get(),
samplerTable->type());
sk_sp<GrD3DDescriptorTable> samplerTable =
gpu->resourceProvider().findOrCreateSamplerTable(samplers);
gpu->currentCommandList()->setGraphicsRootDescriptorTable(
static_cast<unsigned int>(GrD3DRootSignature::ParamIndex::kSamplerDescriptorTable),
samplerTable->baseGpuDescriptor());

View File

@ -9,6 +9,7 @@
#include "include/gpu/GrContextOptions.h"
#include "include/gpu/GrDirectContext.h"
#include "include/private/SkOpts_spi.h"
#include "src/gpu/GrContextPriv.h"
#include "src/gpu/d3d/GrD3DBuffer.h"
#include "src/gpu/d3d/GrD3DCommandList.h"
@ -20,7 +21,10 @@ GrD3DResourceProvider::GrD3DResourceProvider(GrD3DGpu* gpu)
: fGpu(gpu)
, fCpuDescriptorManager(gpu)
, fDescriptorTableManager(gpu)
, fPipelineStateCache(new PipelineStateCache(gpu)) {}
, fPipelineStateCache(new PipelineStateCache(gpu))
, fShaderResourceDescriptorTableCache(gpu)
, fSamplerDescriptorTableCache(gpu) {
}
void GrD3DResourceProvider::destroyResources() {
fSamplers.reset();
@ -153,13 +157,22 @@ D3D12_CPU_DESCRIPTOR_HANDLE GrD3DResourceProvider::findOrCreateCompatibleSampler
return sampler;
}
std::unique_ptr<GrD3DDescriptorTable> GrD3DResourceProvider::createShaderOrConstantResourceTable(
unsigned int size) {
return fDescriptorTableManager.createShaderOrConstantResourceTable(fGpu, size);
sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::findOrCreateShaderResourceTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& shaderResourceViews) {
auto createFunc = [this](GrD3DGpu* gpu, unsigned int numDesc) {
return this->fDescriptorTableManager.createShaderOrConstantResourceTable(gpu, numDesc);
};
return fShaderResourceDescriptorTableCache.findOrCreateDescTable(shaderResourceViews,
createFunc);
}
std::unique_ptr<GrD3DDescriptorTable> GrD3DResourceProvider::createSamplerTable(unsigned int size) {
return fDescriptorTableManager.createSamplerTable(fGpu, size);
sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::findOrCreateSamplerTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& samplers) {
auto createFunc = [this](GrD3DGpu* gpu, unsigned int numDesc) {
return this->fDescriptorTableManager.createSamplerTable(gpu, numDesc);
};
return fShaderResourceDescriptorTableCache.findOrCreateDescTable(samplers, createFunc);
}
sk_sp<GrD3DPipelineState> GrD3DResourceProvider::findOrCreateCompatiblePipelineState(
@ -193,6 +206,10 @@ D3D12_GPU_VIRTUAL_ADDRESS GrD3DResourceProvider::uploadConstantData(void* data,
void GrD3DResourceProvider::prepForSubmit() {
fGpu->currentCommandList()->setCurrentConstantBuffer(fConstantBuffer);
fDescriptorTableManager.prepForSubmit(fGpu);
// Any heap memory used for these will be returned when the command buffer finishes,
// so we have to invalidate all entries.
fShaderResourceDescriptorTableCache.release();
fSamplerDescriptorTableCache.release();
}
////////////////////////////////////////////////////////////////////////////////////////////////
@ -275,3 +292,26 @@ void GrD3DResourceProvider::PipelineStateCache::markPipelineStateUniformsDirty()
});
}
////////////////////////////////////////////////////////////////////////////////////////////////
void GrD3DResourceProvider::DescriptorTableCache::release() {
fMap.reset();
}
sk_sp<GrD3DDescriptorTable> GrD3DResourceProvider::DescriptorTableCache::findOrCreateDescTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& cpuDescriptors,
std::function<sk_sp<GrD3DDescriptorTable>(GrD3DGpu*, unsigned int numDesc)> createFunc) {
sk_sp<GrD3DDescriptorTable>* entry = fMap.find(cpuDescriptors);
if (entry) {
return *entry;
}
unsigned int numDescriptors = cpuDescriptors.size();
SkASSERT(numDescriptors <= kRangeSizesCount);
sk_sp<GrD3DDescriptorTable> descTable = createFunc(fGpu, numDescriptors);
fGpu->device()->CopyDescriptors(1, descTable->baseCpuDescriptorPtr(), &numDescriptors,
numDescriptors, cpuDescriptors.data(), fRangeSizes,
descTable->type());
entry = fMap.insert(cpuDescriptors, std::move(descTable));
return *entry;
}

View File

@ -18,6 +18,7 @@
#include "src/gpu/d3d/GrD3DCpuDescriptorManager.h"
#include "src/gpu/d3d/GrD3DDescriptorTableManager.h"
#include "src/gpu/d3d/GrD3DRootSignature.h"
#include "src/gpu/d3d/GrD3DUtil.h"
#include <memory>
@ -56,9 +57,10 @@ public:
D3D12_CPU_DESCRIPTOR_HANDLE findOrCreateCompatibleSampler(const GrSamplerState& params);
std::unique_ptr<GrD3DDescriptorTable> createShaderOrConstantResourceTable(unsigned int size);
std::unique_ptr<GrD3DDescriptorTable> createSamplerTable(unsigned int size);
sk_sp<GrD3DDescriptorTable> findOrCreateShaderResourceTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& shaderResourceViews);
sk_sp<GrD3DDescriptorTable> findOrCreateSamplerTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>& samplers);
GrD3DDescriptorTableManager* descriptorTableMgr() {
return &fDescriptorTableManager;
}
@ -109,6 +111,39 @@ private:
#endif
};
class DescriptorTableCache : public ::SkNoncopyable {
public:
DescriptorTableCache(GrD3DGpu* gpu) : fGpu(gpu), fMap(64) {
// Initialize the array we pass into CopyDescriptors for ranges.
// At the moment any descriptor we pass into CopyDescriptors is only itself,
// not the beginning of a range, so each range size is always 1.
for (int i = 0; i < kRangeSizesCount; ++i) {
fRangeSizes[i] = 1;
}
}
~DescriptorTableCache() = default;
void release();
typedef std::function<sk_sp<GrD3DDescriptorTable>(GrD3DGpu*, unsigned int)> CreateFunc;
sk_sp<GrD3DDescriptorTable> findOrCreateDescTable(
const std::vector<D3D12_CPU_DESCRIPTOR_HANDLE>&, CreateFunc);
private:
GrD3DGpu* fGpu;
typedef std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> DescTableKey;
typedef sk_sp<GrD3DDescriptorTable> DescTableValue;
struct DescTableHash {
uint32_t operator()(DescTableKey key) const {
return SkOpts::hash_fn(key.data(),
key.size()*sizeof(D3D12_CPU_DESCRIPTOR_HANDLE), 0);
}
};
SkLRUCache<DescTableKey, DescTableValue, DescTableHash> fMap;
static constexpr int kRangeSizesCount = 8;
unsigned int fRangeSizes[kRangeSizesCount];
};
GrD3DGpu* fGpu;
SkSTArray<4, std::unique_ptr<GrD3DDirectCommandList>> fAvailableDirectCommandLists;
@ -123,6 +158,9 @@ private:
std::unique_ptr<PipelineStateCache> fPipelineStateCache;
SkTHashMap<uint32_t, D3D12_CPU_DESCRIPTOR_HANDLE> fSamplers;
DescriptorTableCache fShaderResourceDescriptorTableCache;
DescriptorTableCache fSamplerDescriptorTableCache;
};
#endif

View File

@ -20,8 +20,11 @@ public:
enum class ParamIndex {
kConstantBufferView = 0,
kSamplerDescriptorTable = 1,
kTextureDescriptorTable = 2
kTextureDescriptorTable = 2,
kLast = kTextureDescriptorTable
};
static constexpr unsigned int kParamIndexCount = (unsigned int)(ParamIndex::kLast) + 1;
bool isCompatible(int numTextureSamplers) const;

View File

@ -74,5 +74,10 @@ static constexpr const char* GrDxgiFormatToStr(DXGI_FORMAT dxgiFormat) {
}
}
static constexpr bool operator==(const D3D12_CPU_DESCRIPTOR_HANDLE& first,
const D3D12_CPU_DESCRIPTOR_HANDLE& second) {
return first.ptr == second.ptr;
}
#endif
#endif