diff --git a/gn/gpu.gni b/gn/gpu.gni index 95d11f2306..54a8e0b429 100644 --- a/gn/gpu.gni +++ b/gn/gpu.gni @@ -229,6 +229,8 @@ skia_gpu_sources = [ "$_src/gpu/GrTracing.h", "$_src/gpu/GrTransferFromRenderTask.cpp", "$_src/gpu/GrTransferFromRenderTask.h", + "$_src/gpu/GrUniformAggregator.cpp", + "$_src/gpu/GrUniformAggregator.h", "$_src/gpu/GrUniformDataManager.cpp", "$_src/gpu/GrUniformDataManager.h", "$_src/gpu/GrUserStencilSettings.h", diff --git a/include/private/GrTypesPriv.h b/include/private/GrTypesPriv.h index 30bf996a44..0af6cc451e 100644 --- a/include/private/GrTypesPriv.h +++ b/include/private/GrTypesPriv.h @@ -284,7 +284,7 @@ static inline GrQuadAAFlags SkToGrQuadAAFlags(unsigned flags) { /** * Types of shader-language-specific boxed variables we can create. */ -enum GrSLType { +enum GrSLType : unsigned { kVoid_GrSLType, kBool_GrSLType, kBool2_GrSLType, @@ -560,6 +560,67 @@ static constexpr int GrSLTypeVecLength(GrSLType type) { SkUNREACHABLE; } +/** + * Get dimensions of matrix types or {0, 0} if not a matrix. Note scalars and vectors are not + * considered matrices by this function. + */ +static constexpr SkISize GrSLTypeMatrixDims(GrSLType type) { + switch (type) { + case kFloat2x2_GrSLType: return {2, 2}; + case kFloat3x3_GrSLType: return {3, 3}; + case kFloat4x4_GrSLType: return {4, 4}; + case kHalf2x2_GrSLType: return {2, 2}; + case kHalf3x3_GrSLType: return {3, 3}; + case kHalf4x4_GrSLType: return {4, 4}; + + case kVoid_GrSLType: + case kFloat_GrSLType: + case kHalf_GrSLType: + case kBool_GrSLType: + case kByte_GrSLType: + case kUByte_GrSLType: + case kShort_GrSLType: + case kUShort_GrSLType: + case kInt_GrSLType: + case kUint_GrSLType: + case kFloat2_GrSLType: + case kHalf2_GrSLType: + case kBool2_GrSLType: + case kByte2_GrSLType: + case kUByte2_GrSLType: + case kShort2_GrSLType: + case kUShort2_GrSLType: + case kInt2_GrSLType: + case kUint2_GrSLType: + case kFloat3_GrSLType: + case kHalf3_GrSLType: + case kBool3_GrSLType: + case kByte3_GrSLType: + case kUByte3_GrSLType: + case kShort3_GrSLType: + case kUShort3_GrSLType: + case kInt3_GrSLType: + case kUint3_GrSLType: + case kFloat4_GrSLType: + case kHalf4_GrSLType: + case kBool4_GrSLType: + case kByte4_GrSLType: + case kUByte4_GrSLType: + case kShort4_GrSLType: + case kUShort4_GrSLType: + case kInt4_GrSLType: + case kUint4_GrSLType: + case kTexture2DSampler_GrSLType: + case kTextureExternalSampler_GrSLType: + case kTexture2DRectSampler_GrSLType: + case kTexture2D_GrSLType: + case kSampler_GrSLType: + case kInput_GrSLType: + return {0, 0}; + } + SkUNREACHABLE; +} + static inline GrSLType GrSLCombinedSamplerTypeForTextureType(GrTextureType type) { switch (type) { case GrTextureType::k2D: diff --git a/src/gpu/GrFragmentProcessor.h b/src/gpu/GrFragmentProcessor.h index 89dbb839f0..b11cb255a7 100644 --- a/src/gpu/GrFragmentProcessor.h +++ b/src/gpu/GrFragmentProcessor.h @@ -11,6 +11,7 @@ #include "include/private/SkSLSampleUsage.h" #include "include/private/SkSLString.h" #include "src/gpu/GrProcessor.h" +#include "src/gpu/GrUniformAggregator.h" #include "src/gpu/glsl/GrGLSLUniformHandler.h" #include @@ -372,7 +373,7 @@ protected: } explicit GrFragmentProcessor(const GrFragmentProcessor& src) - : INHERITED(src.classID()), fFlags(src.fFlags) { + : INHERITED(src), fFlags(src.fFlags) { this->cloneAndRegisterAllChildProcessors(src); } @@ -506,6 +507,7 @@ public: stages. @param fragBuilder Interface used to emit code in the shaders. + @param uniforms Used to get names of uniforms added by GrProcessor::uniforms(). @param uniformHandler Interface used for accessing information about our uniforms @param caps The capabilities of the GPU which will render this FP @param fp The processor that generated this program stage. @@ -523,6 +525,7 @@ public: */ struct EmitArgs { EmitArgs(GrGLSLFPFragmentBuilder* fragBuilder, + GrUniformAggregator::ProcessorUniforms uniforms, GrGLSLUniformHandler* uniformHandler, const GrShaderCaps* caps, const GrFragmentProcessor& fp, @@ -530,6 +533,7 @@ public: const char* destColor, const char* sampleCoord) : fFragBuilder(fragBuilder) + , fUniforms(std::move(uniforms)) , fUniformHandler(uniformHandler) , fShaderCaps(caps) , fFp(fp) @@ -537,6 +541,7 @@ public: , fDestColor(destColor) , fSampleCoord(sampleCoord) {} GrGLSLFPFragmentBuilder* fFragBuilder; + GrUniformAggregator::ProcessorUniforms fUniforms; GrGLSLUniformHandler* fUniformHandler; const GrShaderCaps* fShaderCaps; const GrFragmentProcessor& fFp; diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h index 31667b4b1a..19644163a3 100644 --- a/src/gpu/GrGeometryProcessor.h +++ b/src/gpu/GrGeometryProcessor.h @@ -14,6 +14,7 @@ #include "src/gpu/GrShaderCaps.h" #include "src/gpu/GrShaderVar.h" #include "src/gpu/GrSwizzle.h" +#include "src/gpu/GrUniformAggregator.h" #include "src/gpu/glsl/GrGLSLProgramDataManager.h" #include "src/gpu/glsl/GrGLSLUniformHandler.h" #include "src/gpu/glsl/GrGLSLVarying.h" @@ -209,8 +210,8 @@ public: for (int i = 0; i < attrCount; ++i) { const Attribute& attr = attrs[i]; b->appendComment(attr.isInitialized() ? attr.name() : "unusedAttr"); - b->addBits(8, attr.isInitialized() ? attr.cpuType() : 0xff, "attrType"); - b->addBits(8, attr.isInitialized() ? attr.gpuType() : 0xff, "attrGpuType"); + b->addBits(8, attr.isInitialized() ? (int)attr.cpuType() : 0xff, "attrType"); + b->addBits(8, attr.isInitialized() ? (int)attr.gpuType() : 0xff, "attrGpuType"); } }; b->add32(fVertexAttributes.fRawCount, "numVertexAttributes"); @@ -261,6 +262,8 @@ protected: inline static const TextureSampler& IthTextureSampler(int i); private: + GrGeometryProcessor(const GrGeometryProcessor&) = delete; + virtual const TextureSampler& onTextureSampler(int) const { return IthTextureSampler(0); } GrShaderFlags fShaders = kVertex_GrShaderFlag | kFragment_GrShaderFlag; @@ -292,6 +295,7 @@ public: EmitArgs(GrGLSLVertexBuilder* vertBuilder, GrGLSLFPFragmentBuilder* fragBuilder, GrGLSLVaryingHandler* varyingHandler, + GrUniformAggregator::ProcessorUniforms uniforms, GrGLSLUniformHandler* uniformHandler, const GrShaderCaps* caps, const GrGeometryProcessor& geomProc, @@ -301,6 +305,7 @@ public: : fVertBuilder(vertBuilder) , fFragBuilder(fragBuilder) , fVaryingHandler(varyingHandler) + , fUniforms(std::move(uniforms)) , fUniformHandler(uniformHandler) , fShaderCaps(caps) , fGeomProc(geomProc) @@ -310,6 +315,7 @@ public: GrGLSLVertexBuilder* fVertBuilder; GrGLSLFPFragmentBuilder* fFragBuilder; GrGLSLVaryingHandler* fVaryingHandler; + GrUniformAggregator::ProcessorUniforms fUniforms; GrGLSLUniformHandler* fUniformHandler; const GrShaderCaps* fShaderCaps; const GrGeometryProcessor& fGeomProc; diff --git a/src/gpu/GrOpFlushState.cpp b/src/gpu/GrOpFlushState.cpp index 4ce2094596..089d89bb45 100644 --- a/src/gpu/GrOpFlushState.cpp +++ b/src/gpu/GrOpFlushState.cpp @@ -12,6 +12,7 @@ #include "src/gpu/GrDataUtils.h" #include "src/gpu/GrDirectContextPriv.h" #include "src/gpu/GrDrawOpAtlas.h" +#include "src/gpu/GrGeometryProcessor.h" #include "src/gpu/GrGpu.h" #include "src/gpu/GrImageInfo.h" #include "src/gpu/GrProgramInfo.h" diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h index 104537bd4d..f71f2b274f 100644 --- a/src/gpu/GrProcessor.h +++ b/src/gpu/GrProcessor.h @@ -9,6 +9,7 @@ #define GrProcessor_DEFINED #include "include/core/SkMath.h" +#include "include/core/SkSpan.h" #include "include/core/SkString.h" #include "src/gpu/GrColor.h" #include "src/gpu/GrGpuBuffer.h" @@ -144,12 +145,139 @@ public: ClassID classID() const { return fClassID; } + /** + * Describes a uniform. Uniforms consist of: + * type: The type of the values in the shader + * count: Number of elements of 'type' in the array or GrShaderVar::kNonArray if not an + * array. + * offset: byte offset of the data within the GrProcessor class (no relation to uniform + * buffer offset). + * ctype: specifies the way the data at the 'offset' is represented. See CType enum + * comments. + * visibility: specifies in which shader stage(s) the uniform is declared. + */ + class Uniform { + public: + enum class CType : unsigned { + // Any float/half, vector of floats/half, or matrices of floats/halfs are a tightly + // packed array of floats. Similarly, any bool/shorts/ints are a tightly packed array + // of int32_t. + kDefault, + // Can be used with kFloat3x3 or kHalf3x3 + kSkMatrix, + + kLast = kSkMatrix + }; + static constexpr int kCTypeCount = static_cast(CType::kLast) + 1; + + constexpr Uniform() + : fType (static_cast(kVoid_GrSLType)) + , fCount (static_cast(GrShaderVar::kNonArray)) + , fVisibility(static_cast(GrShaderFlags::kNone_GrShaderFlags)) + , fCType (static_cast(CType::kDefault)) + , fOffset (0) {} + + constexpr Uniform(GrSLType type, + ptrdiff_t offset, + GrShaderFlags visibility = kFragment_GrShaderFlag, + CType ctype = CType::kDefault) + : Uniform(type, GrShaderVar::kNonArray, offset, visibility, ctype) {} + + constexpr Uniform(GrSLType type, + int arrayCount, + size_t offset, + GrShaderFlags visibility = kFragment_GrShaderFlag, + CType ctype = CType::kDefault) + : fType (static_cast(type )) + , fCount (static_cast(arrayCount)) + , fVisibility(static_cast(visibility)) + , fCType (static_cast(ctype )) + , fOffset (static_cast(offset )) { + SkASSERT(CTypeCompatibleWithType(ctype, type)); + + SkASSERT(this->type() == type ); + SkASSERT(this->count() == arrayCount); + SkASSERT(this->offset() == offset ); + SkASSERT(this->visibility() == visibility); + SkASSERT(this->ctype() == ctype ); + } + + constexpr Uniform(const Uniform&) = default; + + Uniform& operator=(const Uniform&) = default; + + constexpr bool isInitialized() const { return this->type() != kVoid_GrSLType; } + + constexpr GrSLType type () const { return static_cast (fType); } + constexpr int count () const { return static_cast (fCount); } + constexpr CType ctype () const { return static_cast (fCType); } + constexpr size_t offset () const { return static_cast(fOffset); } + constexpr GrShaderFlags visibility() const { + return static_cast(fVisibility); + } + + static constexpr bool CTypeCompatibleWithType(CType, GrSLType); + + private: + unsigned fType : 6; + unsigned fCount : 8; + unsigned fVisibility : 4; + unsigned fCType : 1; + unsigned fOffset : 32 - (6 + 8 + 4 + 1); + + static_assert(kGrSLTypeCount <= (1 << 6)); + static_assert(kCTypeCount <= (1 << 1)); + }; + + /** Returns the array of uniforms inserted into the program by this processor. */ + SkSpan uniforms() const { return fUniforms; } + + template const T* uniformData(size_t index) const { + SkASSERT(fUniforms[index].isInitialized()); + return SkTAddOffset(this, fUniforms[index].offset()); + } + protected: GrProcessor(ClassID classID) : fClassID(classID) {} - GrProcessor(const GrProcessor&) = delete; + GrProcessor(const GrProcessor&) = default; GrProcessor& operator=(const GrProcessor&) = delete; + /** + * Specifies the uniforms used by this processor. Should be called when the processor is made + * (i.e. constructor or factory function). Any uniforms with type void are ignored. This allows + * a processor to have a contiguous array of data member uniforms where some are conditionally + * initialized. + */ + void setUniforms(SkSpan uniforms) { fUniforms = uniforms; } + const ClassID fClassID; + +private: + SkSpan fUniforms; }; +constexpr bool GrProcessor::Uniform::CTypeCompatibleWithType(CType ctype, GrSLType type) { + switch (ctype) { + case CType::kDefault: + return true; + case CType::kSkMatrix: + return type == kHalf3x3_GrSLType || type == kFloat3x3_GrSLType; + } + SkUNREACHABLE; +} + +/** + * GCC, and clang sometimes but less often for reason, warns if offset_of or__builtin_offsetof is + * used on non-standard layout classes. This is because it is not required to be supported by the + * compiler (conditionally supported by c++17). clang, GCC, and MSVC all support it, however. + */ +#if defined(__GNUC__) || defined(__clang__) +# define GR_BEGIN_UNIFORM_DEFINITIONS _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Winvalid-offsetof\"") +# define GR_END_UNIFORM_DEFINITIONS _Pragma("GCC diagnostic pop") +#else +# define GR_BEGIN_UNIFORM_DEFINITIONS +# define GR_END_UNIFORM_DEFINITIONS +#endif + #endif diff --git a/src/gpu/GrProgramInfo.cpp b/src/gpu/GrProgramInfo.cpp index 091d1133ee..de394aab20 100644 --- a/src/gpu/GrProgramInfo.cpp +++ b/src/gpu/GrProgramInfo.cpp @@ -20,6 +20,25 @@ GrStencilSettings GrProgramInfo::nonGLStencilSettings() const { return stencil; } +static void visit_fp_tree(const GrFragmentProcessor& fp, + const std::function& f) { + f(fp); + for (int i = 0; i < fp.numChildProcessors(); ++i) { + if (const GrFragmentProcessor* child = fp.childProcessor(i)) { + visit_fp_tree(*child, f); + } + } +} + +void GrProgramInfo::visitProcessors(const std::function& f) const { + f(*fGeomProc); + + for (int i = 0; i < fPipeline->numFragmentProcessors(); ++i) { + visit_fp_tree(fPipeline->getFragmentProcessor(i), f); + } + f(fPipeline->getXferProcessor()); +} + #ifdef SK_DEBUG #include "src/gpu/GrTexture.h" diff --git a/src/gpu/GrProgramInfo.h b/src/gpu/GrProgramInfo.h index 9f1f602b56..29d97d5545 100644 --- a/src/gpu/GrProgramInfo.h +++ b/src/gpu/GrProgramInfo.h @@ -93,6 +93,9 @@ public: // to call the visitor on its own primProc proxies. void visitFPProxies(const GrVisitProxyFunc& func) const { fPipeline->visitProxies(func); } + /** Visits the GP, then each root FP in a pre-order traversal, and finally the XP. */ + void visitProcessors(const std::function&) const; + #ifdef SK_DEBUG void validate(bool flushTime) const; void checkAllInstantiated() const; diff --git a/src/gpu/GrSPIRVUniformHandler.cpp b/src/gpu/GrSPIRVUniformHandler.cpp index f073496c71..90505e6e3a 100644 --- a/src/gpu/GrSPIRVUniformHandler.cpp +++ b/src/gpu/GrSPIRVUniformHandler.cpp @@ -293,7 +293,37 @@ GrSwizzle GrSPIRVUniformHandler::samplerSwizzle(GrGLSLUniformHandler::SamplerHan return fSamplerSwizzles[handle.toIndex()]; } -void GrSPIRVUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkString* out) const { +GrUniformDataManager::ProgramUniforms GrSPIRVUniformHandler::getNewProgramUniforms( + const GrUniformAggregator& aggregator) { + GrUniformDataManager::ProgramUniforms result; + result.reserve(aggregator.numProcessors()); + for (int p = 0; p < aggregator.numProcessors(); ++p) { + GrUniformDataManager::ProcessorUniforms uniforms; + auto records = aggregator.processorRecords(p); + uniforms.reserve(records.size()); + for (const GrUniformAggregator::Record& record : records) { + const GrProcessor::Uniform& u = record.uniform(); + uint32_t offset = get_ubo_offset(&fCurrentUBOOffset, u.type(), u.count()); + uniforms.push_back({record.indexInProcessor, u.type(), u.count(), offset}); + + // Add to fNewUniforms so that these get declared. + SPIRVUniformInfo& info = fNewUniforms.push_back(); + GrShaderVar var(record.name, u.type(), u.count()); + SkString qualifier = SkStringPrintf("offset = %d", offset); + var.addLayoutQualifier(qualifier.c_str()); + info.fUBOOffset = offset; + info.fVariable = var; + info.fVisibility = u.visibility(); + info.fOwner = nullptr; + } + result.push_back(std::move(uniforms)); + } + return result; +} + +void GrSPIRVUniformHandler::appendUniformDecls(const GrUniformAggregator&, + GrShaderFlags visibility, + SkString* out) const { auto textures = fTextures.items().begin(); for (const SPIRVUniformInfo& sampler : fSamplers.items()) { if (sampler.fVisibility & visibility) { @@ -311,6 +341,12 @@ void GrSPIRVUniformHandler::appendUniformDecls(GrShaderFlags visibility, SkStrin uniformsString.append(";\n"); } } + for (const UniformInfo& uniform : fNewUniforms.items()) { + if (uniform.fVisibility & visibility) { + uniform.fVariable.appendDecl(fProgramBuilder->shaderCaps(), &uniformsString); + uniformsString.append(";\n"); + } + } if (!uniformsString.isEmpty()) { out->appendf("layout (set = %d, binding = %d) uniform UniformBuffer\n{\n", kUniformDescriptorSet, kUniformBinding); diff --git a/src/gpu/GrSPIRVUniformHandler.h b/src/gpu/GrSPIRVUniformHandler.h index 5cb1aaa911..44475c5e1e 100644 --- a/src/gpu/GrSPIRVUniformHandler.h +++ b/src/gpu/GrSPIRVUniformHandler.h @@ -9,6 +9,7 @@ #define GrSPIRVUniformHandler_DEFINED #include "src/core/SkTBlockList.h" +#include "src/gpu/GrUniformDataManager.h" #include "src/gpu/glsl/GrGLSLUniformHandler.h" /* @@ -46,6 +47,14 @@ public: return fUniforms.item(idx); } + /** + * Call after all legacy style uniforms have been added to assign offsets to new style uniforms + * and create the data structure needed to transfer new style uniforms to GrUniformDataManager. + * This must be called before appendUniformDecls() in order to ensure new style uniforms get + * declared. It must be called only once. + */ + GrUniformDataManager::ProgramUniforms getNewProgramUniforms(const GrUniformAggregator&); + private: explicit GrSPIRVUniformHandler(GrGLSLProgramBuilder* program); @@ -53,7 +62,9 @@ private: const char* name, const GrShaderCaps*) override; const char* samplerVariable(SamplerHandle handle) const override; GrSwizzle samplerSwizzle(SamplerHandle handle) const override; - void appendUniformDecls(GrShaderFlags visibility, SkString*) const override; + void appendUniformDecls(const GrUniformAggregator&, + GrShaderFlags visibility, + SkString*) const override; UniformHandle internalAddUniformArray(const GrFragmentProcessor* owner, uint32_t visibility, GrSLType type, @@ -63,6 +74,7 @@ private: const char** outName) override; UniformInfoArray fUniforms; + UniformInfoArray fNewUniforms; UniformInfoArray fSamplers; UniformInfoArray fTextures; SkTArray fSamplerSwizzles; diff --git a/src/gpu/GrUniformAggregator.cpp b/src/gpu/GrUniformAggregator.cpp new file mode 100644 index 0000000000..8e2d6bf971 --- /dev/null +++ b/src/gpu/GrUniformAggregator.cpp @@ -0,0 +1,63 @@ +/* + * Copyright 2021 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/gpu/GrUniformAggregator.h" + +using ProcessorUniforms = GrUniformAggregator::ProcessorUniforms; + +ProcessorUniforms GrUniformAggregator::addUniforms(const GrProcessor& p, + const SkString& mangleSuffix) { + Processor processor{ + &p, + fUniforms.size(), + fUniforms.size(), + }; + for (size_t i = 0; i < p.uniforms().size(); ++i) { + if (!p.uniforms()[i].isInitialized()) { + continue; + } + // We give every uniform an initial name so it always can be validly declared. When code is + // emitted the processor can give it a more meaningful name. The actual name doesn't matter, + // other than for readability. + SkString unusedName = SkStringPrintf("default_%zu%s", i, mangleSuffix.c_str()); + fUniforms.push_back(Record{std::move(unusedName), &p, i}); + ++processor.end; + } + fProcessors.push_back(processor); + return ProcessorUniforms(p, mangleSuffix, this); +} + +////////////////////////////////////////////////////////////////////////////// + +ProcessorUniforms::ProcessorUniforms(const GrProcessor& p, + const SkString& mangleSuffix, + GrUniformAggregator* aggregator) + : fAgg(aggregator), fMangleSuffix(mangleSuffix) { + for (size_t i = 0; i < fAgg->fProcessors.size(); ++i) { + if (fAgg->fProcessors[i].processor == &p) { + fBegin = fAgg->fProcessors[i].begin; + fEnd = fAgg->fProcessors[i].end; + return; + } + } +} + +const char* ProcessorUniforms::getUniformName(size_t index, const char* newBaseName) const { + for (size_t i = fBegin; i < fEnd; ++i) { + if (fAgg->fUniforms[i].indexInProcessor == index) { + GrUniformAggregator::Record& r = fAgg->fUniforms[i]; + if (newBaseName) { + SkString mangledName = SkStringPrintf("%s%s", newBaseName, fMangleSuffix.c_str()); + r.name = mangledName; + } + return r.name.c_str(); + } else if (fAgg->fUniforms[i].indexInProcessor > index) { + break; + } + } + return nullptr; +} diff --git a/src/gpu/GrUniformAggregator.h b/src/gpu/GrUniformAggregator.h new file mode 100644 index 0000000000..af4f7fd8b7 --- /dev/null +++ b/src/gpu/GrUniformAggregator.h @@ -0,0 +1,106 @@ +/* + * Copyright 2021 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef GrUniformAggregator_DEFINED +#define GrUniformAggregator_DEFINED + +#include "include/core/SkString.h" +#include "include/private/SkChecksum.h" +#include "src/gpu/GrProcessor.h" + +#include + +/** Collects the uniforms from various processors comprising the shaders of a pipeline/program. */ +class GrUniformAggregator { +public: + class ProcessorUniforms { + public: + ProcessorUniforms(ProcessorUniforms&&) = default; + ProcessorUniforms& operator=(ProcessorUniforms&&) = default; + + /** + * Finds a uniform name by index. The uniform initially has a generic name. It can + * optionally be given a descriptive name via the newBaseName param. However, the caller + * must use the returned name because even if a name is passed the final uniform name will + * be mangled to be unique. + */ + const char* getUniformName(size_t index, const char* newBaseName = nullptr) const; + + private: + ProcessorUniforms(const GrProcessor& p, + const SkString& mangleSuffix, + GrUniformAggregator* aggregator); + + ProcessorUniforms(const ProcessorUniforms&) = delete; + ProcessorUniforms& operator=(const ProcessorUniforms&) = delete; + + GrUniformAggregator* fAgg; + + SkString fMangleSuffix; + + size_t fBegin = 0; + size_t fEnd = 0; + + friend class GrUniformAggregator; + }; + + struct Record { + SkString name; + const GrProcessor* processor = nullptr; + size_t indexInProcessor = -1; + + const GrProcessor::Uniform& uniform() const { + return processor->uniforms()[indexInProcessor]; + } + }; + + GrUniformAggregator() = default; + + /** + * Aggregates the uniforms for a processor. This must be called for all processors in a + * program and must be called in this order: GP, FP0-T, FP1-T, ... XP. FPi-T is a pre-order + * traversal of the ith FP in the GrPipeline. + */ + ProcessorUniforms addUniforms(const GrProcessor&, const SkString& mangleSuffix); + + /** + * Iterable range of all uniform Records across all processors added. + */ + SkSpan records() const { + return SkSpan(fUniforms.data(), fUniforms.size()); + } + + /** + * Iterable range of Records for a given processor index. + */ + SkSpan processorRecords(size_t processorIndex) const { + SkASSERT(processorIndex < fProcessors.size()); + size_t size = fProcessors[processorIndex].end - fProcessors[processorIndex].begin; + return SkSpan(fUniforms.data() + fProcessors[processorIndex].begin, size); + } + + int uniformCount() const { return fUniforms.size(); } + + /** + * The number of processors whose uniforms have been added to the aggregator, including + * processors that had no valid uniforms. + */ + int numProcessors() const { return fProcessors.size(); } + +private: + struct Processor { + const GrProcessor* processor; + size_t begin; // index of first uniform owned by processor in fUniforms. + size_t end; // index of last uniform + 1 owned by processor in fUniforms. + }; + std::vector fProcessors; + using Records = std::vector; + + Records fUniforms; +}; + +#endif diff --git a/src/gpu/GrUniformDataManager.cpp b/src/gpu/GrUniformDataManager.cpp index 69842d0852..859ff1dbda 100644 --- a/src/gpu/GrUniformDataManager.cpp +++ b/src/gpu/GrUniformDataManager.cpp @@ -7,18 +7,295 @@ #include "src/gpu/GrUniformDataManager.h" +#include "src/gpu/GrProgramInfo.h" #include "src/gpu/GrShaderVar.h" // ensure that these types are the sizes the uniform data is expecting static_assert(sizeof(int32_t) == 4); static_assert(sizeof(float) == 4); -GrUniformDataManager::GrUniformDataManager(uint32_t uniformCount, uint32_t uniformSize) - : fUniformSize(uniformSize) - , fUniformsDirty(false) { +////////////////////////////////////////////////////////////////////////////// + +GrUniformDataManager::UniformManager::UniformManager(ProgramUniforms uniforms, Layout layout) + : fUniforms(std::move(uniforms)), fLayout(layout) {} + +template static constexpr size_t tight_vec_size(int vecLength) { + return sizeof(BaseType) * vecLength; +} + +/** + * From Section 7.6.2.2 "Standard Uniform Block Layout": + * 1. If the member is a scalar consuming N basic machine units, the base alignment is N. + * 2. If the member is a two- or four-component vector with components consuming N basic machine + * units, the base alignment is 2N or 4N, respectively. + * 3. If the member is a three-component vector with components consuming N + * basic machine units, the base alignment is 4N. + * 4. If the member is an array of scalars or vectors, the base alignment and array + * stride are set to match the base alignment of a single array element, according + * to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The + * array may have padding at the end; the base offset of the member following + * the array is rounded up to the next multiple of the base alignment. + * 5. If the member is a column-major matrix with C columns and R rows, the + * matrix is stored identically to an array of C column vectors with R components each, + * according to rule (4). + * 6. If the member is an array of S column-major matrices with C columns and + * R rows, the matrix is stored identically to a row of S × C column vectors + * with R components each, according to rule (4). + * 7. If the member is a row-major matrix with C columns and R rows, the matrix + * is stored identically to an array of R row vectors with C components each, + * according to rule (4). + * 8. If the member is an array of S row-major matrices with C columns and R + * rows, the matrix is stored identically to a row of S × R row vectors with C + * components each, according to rule (4). + * 9. If the member is a structure, the base alignment of the structure is N, where + * N is the largest base alignment value of any of its members, and rounded + * up to the base alignment of a vec4. The individual members of this substructure are then + * assigned offsets by applying this set of rules recursively, + * where the base offset of the first member of the sub-structure is equal to the + * aligned offset of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is rounded up to + * the next multiple of the base alignment of the structure. + * 10. If the member is an array of S structures, the S elements of the array are laid + * out in order, according to rule (9). + */ +template +struct Rules140 { + /** + * For an array of scalars or vectors this returns the stride between array elements. For + * matrices or arrays of matrices this returns the stride between columns of the matrix. Note + * that for single (non-array) scalars or vectors we don't require a stride. + */ + static constexpr size_t Stride(int count) { + SkASSERT(count >= 1 || count == GrShaderVar::kNonArray); + static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4); + static_assert(Cols >= 1 && Cols <= 4); + if (Cols != 1) { + // This is a matrix or array of matrices. We return the stride between columns. + SkASSERT(RowsOrVecLength > 1); + return Rules140::Stride(1); + } + if (count == 0) { + // Stride doesn't matter for a non-array. + return 0; + } + + // Rule 4. + + // Alignment of vec4 by Rule 2. + constexpr size_t kVec4Alignment = tight_vec_size(4); + // Get alignment of a single vector of BaseType by Rule 1, 2, or 3 + int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength; + size_t kElementAlignment = tight_vec_size(n); + // Round kElementAlignment up to multiple of kVec4Alignment. + size_t m = (kElementAlignment + kVec4Alignment - 1)/kVec4Alignment; + return m*kVec4Alignment; + } +}; + +/** + * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage + * identically to uniform and shader storage blocks using the std140 layout, except that the base + * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are + * not rounded up a multiple of the base alignment of a vec4. + */ +template +struct Rules430 { + static constexpr size_t Stride(int count) { + SkASSERT(count >= 1 || count == GrShaderVar::kNonArray); + static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4); + static_assert(Cols >= 1 && Cols <= 4); + + if (Cols != 1) { + // This is a matrix or array of matrices. We return the stride between columns. + SkASSERT(RowsOrVecLength > 1); + return Rules430::Stride(1); + } + if (count == 0) { + // Stride doesn't matter for a non-array. + return 0; + } + // Rule 4 without the round up to a multiple of align-of vec4. + return tight_vec_size(RowsOrVecLength == 3 ? 4 : RowsOrVecLength); + } +}; + +// The strides used here were derived from the rules we've imposed on ourselves in +// GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of +// their 4-component equivalents. +template +struct RulesMetal { + static constexpr size_t Stride(int count) { + SkASSERT(count >= 1 || count == GrShaderVar::kNonArray); + static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4); + static_assert(Cols >= 1 && Cols <= 4); + if (Cols != 1) { + // This is a matrix or array of matrices. We return the stride between columns. + SkASSERT(RowsOrVecLength > 1); + return RulesMetal::Stride(1); + } + if (count == 0) { + // Stride doesn't matter for a non-array. + return 0; + } + return tight_vec_size(RowsOrVecLength == 3 ? 4 : RowsOrVecLength); + } +}; + +template