diff --git a/include/UnityInstancingFlexibleArraySize.h b/include/UnityInstancingFlexibleArraySize.h index 6a6dd00..70fb308 100644 --- a/include/UnityInstancingFlexibleArraySize.h +++ b/include/UnityInstancingFlexibleArraySize.h @@ -4,8 +4,10 @@ #include #define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE" +#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform" const unsigned int kArraySizeConstantID = 0; +const unsigned int kPreTransformConstantID = 1; // TODO: share with Runtime/GfxDevice/InstancingUtilities.h inline bool IsUnityInstancingConstantBufferName(const char* cbName) @@ -13,3 +15,9 @@ inline bool IsUnityInstancingConstantBufferName(const char* cbName) static const char kInstancedCbNamePrefix[] = "UnityInstancing"; return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0; } + +inline bool IsPreTransformConstantBufferName(const char* cbName) +{ + static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData"; + return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0; +} diff --git a/include/hlslcc.h b/include/hlslcc.h index a9225e2..dc7853a 100644 --- a/include/hlslcc.h +++ b/include/hlslcc.h @@ -198,6 +198,11 @@ public: } } + uint32_t PeekFirstFreeSlot() const + { + return m_FreeSlots.back(); + } + uint32_t SaveTotalShaderStageAllocationsCount() { m_ShaderStageAllocations = m_Allocations.size(); @@ -216,13 +221,37 @@ private: //carry over any information needed about a different shader stage //in order to construct valid GLSL shader combinations. + //Using GLSLCrossDependencyData is optional. However some shader //combinations may show link failures, or runtime errors. class GLSLCrossDependencyData { public: + + struct GLSLBufferBindPointInfo + { + uint32_t slot; + bool known; + }; + // A container for a single Vulkan resource binding ( pair) - typedef std::pair VulkanResourceBinding; + struct VulkanResourceBinding + { + uint32_t set; + uint32_t binding; + }; + + enum GLSLBufferType + { + BufferType_ReadWrite, + BufferType_Constant, + BufferType_SSBO, + BufferType_Texture, + BufferType_UBO, + + BufferType_Count, + BufferType_Generic = BufferType_ReadWrite + }; private: //Required if PixelInterpDependency is true @@ -240,6 +269,13 @@ private: VulkanResourceBindings m_VulkanResourceBindings; uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. + typedef std::map GLSLResouceBindings; + +public: + GLSLResouceBindings m_GLSLResourceBindings; + uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges + uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings + inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) { switch (eShaderType) @@ -284,10 +320,6 @@ private: } } - typedef std::map SpecializationConstantMap; - SpecializationConstantMap m_SpecConstantMap; - uint32_t m_NextSpecID; - public: GLSLCrossDependencyData() : eTessPartitioning(), @@ -297,27 +329,78 @@ public: hasControlPoint(false), hasPatchConstant(false), ui32ProgramStages(0), - m_ExtBlendModes(), - m_NextSpecID(0) + m_ExtBlendModes() { memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); + memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding)); } // Retrieve the location for a varying with a given name. // If the name doesn't already have an allocated location, allocate one // and store it into the map. - inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput) + inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex) { int nspace = GetVaryingNamespace(eShaderType, isInput); VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); if (itr != varyingLocationsMap[nspace].end()) return itr->second; - uint32_t newKey = nextAvailableVaryingLocation[nspace]; - nextAvailableVaryingLocation[nspace]++; - varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); - return newKey; + if (keepLocation) + { + // Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) + + // Inspect last 2 characters in name + size_t len = name.length(); + + if (len > 1) + { + if (isdigit(name[len - 1])) + { + uint32_t index = 0; + if (isdigit(name[len - 2])) + index = atoi(&name[len - 2]); // 2-digits index + else + index = atoi(&name[len - 1]); // 1-digit index + + if (index < 32) // Some platforms only allow 32 varying locations + { + // Check that index is not already used + bool canUseIndex = true; + for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) + { + if (it->second == index) + { + canUseIndex = false; + break; + } + } + + if (canUseIndex) + { + varyingLocationsMap[nspace].insert(std::make_pair(name, index)); + return index; + } + } + } + } + + // fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis + uint32_t maxIndexAlreadyAssigned = 0; + for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) + maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second); + + uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1); + varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex)); + return fallbackIndex; + } + else + { + uint32_t newKey = nextAvailableVaryingLocation[nspace]; + nextAvailableVaryingLocation[nspace]++; + varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); + return newKey; + } } // Retrieve the binding for a resource (texture, constant buffer, image) with a given name @@ -326,7 +409,7 @@ public: // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' - inline std::pair GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) + inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) { // scan for the special marker const char *marker = "Xhlslcc_set_%d_bind_%dX"; @@ -343,11 +426,11 @@ public: name.erase(startLoc, endLoc - startLoc + 1); } // Add to map - VulkanResourceBinding newBind = std::make_pair(Set, Binding); + VulkanResourceBinding newBind = { Set, Binding }; m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); if (allocRoomForCounter) { - VulkanResourceBinding counterBind = std::make_pair(Set, Binding + 1); + VulkanResourceBinding counterBind = { Set, Binding + 1 }; m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); } @@ -359,18 +442,100 @@ public: return itr->second; // Allocate a new one - VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; m_NextAvailableVulkanResourceBinding[preferredSet]++; m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); if (allocRoomForCounter) { - VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; m_NextAvailableVulkanResourceBinding[preferredSet]++; m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); } return newBind; } + // GLSL Bind point handling logic + // Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations + + // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. + // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. + // In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero. + // This is only used when we are doing old style binding setup + void SetupGLSLResourceBindingSlotsIndices() + { + for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++) + { + m_StructuredBufferBindPoints[i] = i; + } + } + + void RemoveBindPointFromAvailableList(uint32_t bindPoint) + { + for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++) + { + if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here + { + memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t)); + break; + } + } + } + + void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type) + { + m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint)); + RemoveBindPointFromAvailableList(bindPoint); + } + + bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType) + { + return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO; + } + + uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType) + { + uint32_t binding = -1; + + if (ShouldUseBufferSpecificBinding(bufferType)) + { + binding = m_NextAvailableGLSLResourceBinding[bufferType]; + } + else + { + binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]]; + } + + return binding; + } + + void UpdateResourceBindingIndex(GLSLBufferType bufferType) + { + if (ShouldUseBufferSpecificBinding(bufferType)) + { + m_NextAvailableGLSLResourceBinding[bufferType]++; + } + else + { + m_NextAvailableGLSLResourceBinding[BufferType_Generic]++; + } + } + + inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType) + { + GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name); + if (itr != m_GLSLResourceBindings.end()) + { + return GLSLBufferBindPointInfo{ itr->second, true }; + } + + uint32_t binding = GetGLSLBufferBindPointIndex(bufferType); + UpdateResourceBindingIndex(bufferType); + + m_GLSLResourceBindings.insert(std::make_pair(name, binding)); + + return GLSLBufferBindPointInfo{ binding, false }; + } + //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D, //but they appear on inputs inside domain shaders for GL. //Hull shader must be compiled before domain so the @@ -437,23 +602,32 @@ public: varyingLocationsMap[i].clear(); nextAvailableVaryingLocation[i] = 0; } - m_NextSpecID = kArraySizeConstantID + 1; - m_SpecConstantMap.clear(); m_SharedFunctionMembers.clear(); m_SharedDependencies.clear(); } - // Retrieve or allocate a layout slot for Vulkan specialization constant - inline uint32_t GetSpecializationConstantSlot(const std::string &name) + bool IsHullShaderInputAlreadyDeclared(const std::string& name) { - SpecializationConstantMap::iterator itr = m_SpecConstantMap.find(name); - if (itr != m_SpecConstantMap.end()) - return itr->second; + bool isKnown = false; - m_SpecConstantMap.insert(std::make_pair(std::string(name), m_NextSpecID)); + for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx) + { + if (m_hullShaderInputs[idx] == name) + { + isKnown = true; + break; + } + } - return m_NextSpecID++; + return isKnown; } + + void RecordHullShaderInput(const std::string& name) + { + m_hullShaderInputs.push_back(name); + } + + std::vector m_hullShaderInputs; }; struct GLSLShader @@ -491,6 +665,21 @@ public: virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} + + // these are for now metal only (but can be trivially added for other backends if needed) + // they are useful mostly for diagnostics as interim values are actually hidden from user + virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {} + virtual void OnBuiltinOutput(SPECIAL_NAME name) {} + virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {} + + + enum AccessType + { + ReadAccess = 1 << 0, + WriteAccess = 1 << 1 + }; + + virtual void OnStorageImage(int bindIndex, unsigned int access) {} }; @@ -543,10 +732,10 @@ static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000; static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000; // If set, adds location qualifiers to intra-shader varyings. -static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; +static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf) -// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS -// so that if that macro is defined, all UBO declarations will become normal uniforms +// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS +// so that if that macro is undefined, all UBO declarations will become normal uniforms static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000; // If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code @@ -567,8 +756,7 @@ static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; // If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead. static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; -// If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants -static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000; +// Unused 0x200000; // If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000; @@ -586,6 +774,18 @@ static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000; // Disable fastmath static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000; +//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that) +static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000; + +// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to +static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000; + +// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) +static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000; + +// Code generation might vary for mobile targets, or using lower sampler precision than full by default +static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000; + #ifdef __cplusplus extern "C" { #endif diff --git a/include/pstdint.h b/include/pstdint.h index f155b23..5a53278 100644 --- a/include/pstdint.h +++ b/include/pstdint.h @@ -678,7 +678,7 @@ typedef uint_least64_t uint_fast64_t; # elif defined(__i386__) || defined(_WIN32) || defined(WIN32) # define stdint_intptr_bits 32 # elif defined(__INTEL_COMPILER) -/* TODO -- what did Intel do about x86-64? */ +#error Unknown compiler # endif # ifdef stdint_intptr_bits @@ -711,9 +711,7 @@ typedef uint_least64_t uint_fast64_t; typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t; typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t; # else -/* TODO -- This following is likely wrong for some platforms, and does - nothing for the definition of uintptr_t. */ -typedef ptrdiff_t intptr_t; +#error Unknown compiler # endif # define STDINT_H_UINTPTR_T_DEFINED #endif diff --git a/src/ControlFlowGraph.cpp b/src/ControlFlowGraph.cpp index 92f7918..bf45aae 100644 --- a/src/ControlFlowGraph.cpp +++ b/src/ControlFlowGraph.cpp @@ -9,7 +9,7 @@ using namespace HLSLcc::ControlFlow; using HLSLcc::ForEachOperand; -const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction) +const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction) { using std::for_each; @@ -17,7 +17,7 @@ const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction) m_BlockStorage.clear(); // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does - BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL); + BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction); // Build the reachable set for each block bool hadChanges; @@ -58,10 +58,11 @@ BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *ins // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build(). // Auto-registers itself into ControlFlowGraph -BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead) +BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction) : m_Graph(graph) , m_First(psFirst) , m_Last(NULL) + , m_End(endInstruction) { m_UEVar.clear(); m_VarKill.clear(); @@ -94,7 +95,7 @@ BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, cons void BasicBlock::Build() { const Instruction *inst = m_First; - while (1) + while (inst != m_End) { // Process sources first ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, @@ -158,7 +159,8 @@ void BasicBlock::Build() default: break; case OPCODE_RET: - blockDone = true; + // Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309) + // blockDone = true; break; case OPCODE_RETC: // Basic block is done, start a next one. @@ -240,7 +242,7 @@ void BasicBlock::Build() m_Reachable = m_DEDef; // Tag the end of the basic block - m_Last = inst; + m_Last = std::max(m_First, std::min(inst, m_End - 1)); // printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); } @@ -256,7 +258,7 @@ BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst) return b; } // Otherwise create one. Self-registering and self-connecting - return new BasicBlock(psFirst, m_Graph, m_First); + return new BasicBlock(psFirst, m_Graph, m_First, m_End); } bool BasicBlock::RebuildReachable() @@ -334,6 +336,7 @@ void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) #if ENABLE_UNIT_TESTS #define UNITY_EXTERNAL_TOOL 1 +#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS #include "Testing.h" // From Runtime/Testing UNIT_TEST_SUITE(HLSLcc) @@ -348,7 +351,7 @@ UNIT_TEST_SUITE(HLSLcc) }; ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); CHECK_EQUAL(&inst[0], root.First()); CHECK_EQUAL(&inst[1], root.Last()); @@ -403,7 +406,7 @@ UNIT_TEST_SUITE(HLSLcc) }; ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); CHECK_EQUAL(root.First(), &inst[0]); CHECK_EQUAL(root.Last(), &inst[2]); @@ -539,7 +542,7 @@ UNIT_TEST_SUITE(HLSLcc) }; ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); CHECK_EQUAL(&inst[0], root.First()); CHECK_EQUAL(&inst[4], root.Last()); @@ -699,7 +702,7 @@ UNIT_TEST_SUITE(HLSLcc) }; ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); CHECK_EQUAL(&inst[0], root.First()); CHECK_EQUAL(&inst[2], root.Last()); diff --git a/src/DataTypeAnalysis.cpp b/src/DataTypeAnalysis.cpp index 8fa463f..a53fc5a 100644 --- a/src/DataTypeAnalysis.cpp +++ b/src/DataTypeAnalysis.cpp @@ -430,13 +430,17 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, case OPCODE_LD: case OPCODE_LD_MS: - // TODO: Would need to know the sampler return type - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + { + SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0]; + MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType); MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); break; + } case OPCODE_MOVC: MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + break; + case OPCODE_SWAPC: MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); break; @@ -455,6 +459,7 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); break; } + break; case OPCODE_SAMPLE_INFO: // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. @@ -594,20 +599,6 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, case OPCODE_DCL_RESOURCE_STRUCTURED: case OPCODE_SYNC: - // TODO - case OPCODE_DADD: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_EVAL_SNAPPED: case OPCODE_EVAL_SAMPLE_INDEX: case OPCODE_EVAL_CENTROID: @@ -615,7 +606,22 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, case OPCODE_DCL_GS_INSTANCE_COUNT: case OPCODE_ABORT: - case OPCODE_DEBUG_BREAK:*/ + case OPCODE_DEBUG_BREAK: + + // Double not supported + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + */ default: break; diff --git a/src/HLSLCrossCompilerContext.cpp b/src/HLSLCrossCompilerContext.cpp index f3be7ff..7117d81 100644 --- a/src/HLSLCrossCompilerContext.cpp +++ b/src/HLSLCrossCompilerContext.cpp @@ -7,6 +7,7 @@ #include "internal_includes/debug.h" #include "internal_includes/Translator.h" #include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/languages.h" #include "include/hlslcc.h" #include @@ -49,8 +50,8 @@ void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); - // Only do sampler precision downgrade on pixel shaders. - if (psShader->eShaderType == PIXEL_SHADER) + // Only do sampler precision downgrade with pixel shaders on mobile targets / Switch + if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch())) UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); @@ -64,6 +65,55 @@ void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; } +void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs() +{ + if (psShader->eShaderType != PIXEL_SHADER) + return; + + if (!psShader->extensions->EXT_shader_framebuffer_fetch) + return; + + if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0) + return; + + if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST)) + return; + + if (!psDependencies) + return; + + if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) && + ((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0)) + return; + + // The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments + // So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures. + int maxInOutRenderTargetIndex = -1; + for (const Declaration& decl : psShader->asPhases[0].psDecl) + { + if (decl.eOpcode != OPCODE_DCL_INPUT_PS) + continue; + + const Operand& operand = decl.asOperands[0]; + if (!operand.iPSInOut) + continue; + + const ShaderInfo::InOutSignature* signature = NULL; + if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true)) + continue; + + const int index = signature->ui32SemanticIndex; + if (index > maxInOutRenderTargetIndex) + maxInOutRenderTargetIndex = index; + } + + if (maxInOutRenderTargetIndex >= 0) + { + if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture]) + psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1; + } +} + void HLSLCrossCompilerContext::ClearDependencyData() { switch (psShader->eShaderType) diff --git a/src/HLSLcc.cpp b/src/HLSLcc.cpp index efcbe9f..4592d8c 100644 --- a/src/HLSLcc.cpp +++ b/src/HLSLcc.cpp @@ -1,6 +1,7 @@ #include "hlslcc.h" #include +#include #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/toGLSL.h" #include "internal_includes/toMetal.h" @@ -27,6 +28,27 @@ #define GL_COMPUTE_SHADER 0x91B9 #endif +static bool CheckConstantBuffersNoDuplicateNames(const std::vector& buffers, HLSLccReflection& reflectionCallbacks) +{ + uint32_t count = buffers.size(); + for (uint32_t i = 0; i < count; ++i) + { + const ConstantBuffer& lhs = buffers[i]; + for (uint32_t j = i + 1; j < count; ++j) + { + const ConstantBuffer& rhs = buffers[j]; + if (lhs.name == rhs.name) + { + std::ostringstream oss; + oss << "Duplicate constant buffer declaration: " << lhs.name; + reflectionCallbacks.OnDiagnostics(oss.str(), 0, true); + return false; + } + } + } + + return true; +} HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, unsigned int flags, @@ -49,6 +71,10 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, if (psShader.get()) { + Shader* shader = psShader.get(); + if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks)) + return 0; + HLSLCrossCompilerContext sContext(reflectionCallbacks); // Add shader precisions from the list @@ -59,7 +85,11 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; } - sContext.psShader = psShader.get(); +#ifdef _DEBUG + flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS; +#endif + + sContext.psShader = shader; sContext.flags = flags; // If dependencies == NULL, we'll create a dummy object for it so that there's always something there. @@ -68,6 +98,7 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, { depPtr.reset(new GLSLCrossDependencyData()); sContext.psDependencies = depPtr.get(); + sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices(); } else sContext.psDependencies = dependencies; diff --git a/src/HLSLccToolkit.cpp b/src/HLSLccToolkit.cpp index 0d7b849..d081f2d 100644 --- a/src/HLSLccToolkit.cpp +++ b/src/HLSLccToolkit.cpp @@ -96,8 +96,7 @@ namespace HLSLcc } } - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, - const int components) + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components) { static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; @@ -114,6 +113,7 @@ namespace HLSLcc case SVT_UINT: return uintTypes[components]; case SVT_UINT16: + case SVT_UINT8: // there is not uint8 in metal so treat it as ushort return ushortTypes[components]; case SVT_INT: return intTypes[components]; @@ -304,6 +304,47 @@ namespace HLSLcc } } + RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type) + { + switch (type) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + return RETURN_TYPE_SINT; + case SVT_UINT: + case SVT_UINT16: + return RETURN_TYPE_UINT; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + return RETURN_TYPE_FLOAT; + default: + return RETURN_TYPE_UNUSED; + } + } + + REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type) + { + switch (type) + { + case SVT_INT: + case SVT_UINT: + case SVT_FLOAT: + return REFLECT_RESOURCE_PRECISION_HIGHP; + case SVT_INT16: + case SVT_UINT16: + case SVT_FLOAT16: + return REFLECT_RESOURCE_PRECISION_MEDIUMP; + case SVT_INT12: + case SVT_FLOAT10: + case SVT_UINT8: + return REFLECT_RESOURCE_PRECISION_LOWP; + default: + return REFLECT_RESOURCE_PRECISION_UNKNOWN; + } + } + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) { return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); @@ -454,8 +495,8 @@ namespace HLSLcc if (context->psShader->eTargetLanguage == LANG_METAL) { - // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size - if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT)) + // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size + if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT)) return true; } diff --git a/src/Operand.cpp b/src/Operand.cpp index 49beaac..9d9bf23 100644 --- a/src/Operand.cpp +++ b/src/Operand.cpp @@ -9,7 +9,7 @@ uint32_t Operand::GetAccessMask() const { int i; uint32_t accessMask = 0; - // TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now. + // NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now. switch (eSelMode) { default: @@ -245,6 +245,10 @@ int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const { + // indexable temps (temp arrays) are always float + if (eType == OPERAND_TYPE_INDEXABLE_TEMP) + return SVT_FLOAT; + // The min precision qualifier overrides all of the stuff below switch (eMinPrecision) { @@ -468,15 +472,12 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S { int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); if (foundVar) - { return psVarType->Type; - } + + ASSERT(0); } else - { - // Todo: this isn't correct yet. - return SVT_FLOAT; - } + ASSERT(0); break; } case OPERAND_TYPE_IMMEDIATE32: @@ -520,7 +521,6 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; } - case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats default: { return SVT_FLOAT; @@ -572,7 +572,6 @@ int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) cons ASSERT(psSig != NULL); - // TODO: Are there ever any cases where the mask has 'holes'? return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); } @@ -603,9 +602,9 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL) { // check which one of the src operands is the original index - if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32) + if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32) psOriginOp = &asOps[2]; - else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) psOriginOp = &asOps[3]; } else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL) diff --git a/src/Shader.cpp b/src/Shader.cpp index b59bd73..6f62ffe 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -65,51 +65,11 @@ void Shader::ConsolidateHullTempVars() } } -// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. -// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. -// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. -void Shader::PrepareStructuredBufferBindingSlots() -{ - uint32_t i; - - for (i = 0; i < MAX_RESOURCE_BINDINGS; i++) - { - aui32StructuredBufferBindingPoints[i] = i; - } -} - -// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list -void Shader::ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase) -{ - uint32_t p; - std::vector &bindingArray = aui32StructuredBufferBindingPoints; - - for (p = 0; p < psPhase->psDecl.size(); ++p) - { - if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || - psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) - { - uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point - uint32_t i; - - // Find uav binding point from the list. Drop search if not found. - for (i = 0; i < MAX_RESOURCE_BINDINGS && bindingArray[i] <= uav; i++) - { - if (bindingArray[i] == uav) // Remove uav binding point from the list by copying array remainder here - { - memcpy(&bindingArray[i], &bindingArray[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t)); - break; - } - } - } - } -} - // Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing. // Go through all image declarations and instructions accessing it to see if it is readonly/writeonly. // While doing that we also get the number of components expected in the image format. // Also resolve access flags for other UAVs as well. No component count resolving for them. -void ShaderPhase::ResolveUAVProperties() +void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo) { Declaration *psFirstDeclaration = &psDecl[0]; @@ -163,8 +123,10 @@ void ShaderPhase::ResolveUAVProperties() case OPCODE_ATOMIC_XOR: case OPCODE_ATOMIC_IMIN: case OPCODE_ATOMIC_UMIN: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_UMAX: opIndex = 0; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; numComponents = 1; break; @@ -179,7 +141,7 @@ void ShaderPhase::ResolveUAVProperties() case OPCODE_IMM_ATOMIC_EXCH: case OPCODE_IMM_ATOMIC_CMP_EXCH: opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; numComponents = 1; break; @@ -211,7 +173,7 @@ void ShaderPhase::ResolveUAVProperties() case OPCODE_IMM_ATOMIC_ALLOC: case OPCODE_IMM_ATOMIC_CONSUME: opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; numComponents = 0; break; @@ -235,6 +197,16 @@ void ShaderPhase::ResolveUAVProperties() psDecl->sUAV.ui32NumComponents = numComponents; } } + + if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + const ResourceBinding* psBinding = 0; + if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding)) + { + // component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4 + psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1; + } + } } } @@ -601,6 +573,18 @@ void Shader::AnalyzeIOOverlap() } } +void Shader::SetMaxSemanticIndex() +{ + for (std::vector::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); + + for (std::vector::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); + + for (std::vector::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); +} + // In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array. // Luckily, each chunk is always accessed with suboperand plus (in ui32RegisterNumber) // So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read. @@ -753,7 +737,7 @@ HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG() { if (!m_CFGInitialized) { - m_CFG.Build(&psInst[0]); + m_CFG.Build(psInst.data(), psInst.data() + psInst.size()); m_CFGInitialized = true; } diff --git a/src/ShaderInfo.cpp b/src/ShaderInfo.cpp index a494125..554f202 100644 --- a/src/ShaderInfo.cpp +++ b/src/ShaderInfo.cpp @@ -4,6 +4,8 @@ #include "Operand.h" #include #include +#include + SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo) { @@ -385,24 +387,133 @@ ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType) return RGROUP_CBUFFER; } +static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn) +{ + ASSERT(samplerIn.compare(0, 7, "sampler") == 0); + + // please note that we do not have hard rules about how sampler names should be structured + // what's more they can even skip texture name (but that should be handled separately) + // how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word" + // note that we want to support c-style naming (with underscores for spaces) + // as it is pretty normal to have texture name starting with underscore + // we bind underscores "to the right" + + // note that we want sampler state to be case insensitive + // while checking for a match could be done with strncasecmp/_strnicmp + // windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead + std::string sampler = samplerIn; + for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i) + *i = std::tolower(*i); + + struct Token { const char* str; int len; }; + #define TOKEN(s) { s, (int)strlen(s) } + Token token[] = { + TOKEN("compare"), + TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"), + TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"), + TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"), + TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"), + TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"), + }; + #undef TOKEN + + const char* s = sampler.c_str(); + for (int texNameStart = 7; s[texNameStart];) + { + // skip underscores and find the potential beginning of a token + int tokenStart = texNameStart, tokenEnd = -1; + while (s[tokenStart] == '_') + ++tokenStart; + + // check token list for matches + for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i) + if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0) + tokenEnd = tokenStart + token[i].len; + + if (tokenEnd < 0) + { + // we have found texture name + + // find next token + int nextTokenStart = sampler.length(); + for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i) + { + // again: note that we want to be case insensitive + const int pos = sampler.find(token[i].str, tokenStart); + + if (pos != std::string::npos && pos < nextTokenStart) + nextTokenStart = pos; + } + + // check preceeding underscores, but only if we have found an actual token (not the end of the string) + if (nextTokenStart < sampler.length()) + { + while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_') + --nextTokenStart; + } + + // note that we return the substring of the initial sampler name to preserve case + return samplerIn.substr(texNameStart, nextTokenStart - texNameStart); + } + else + { + // we have found known token + texNameStart = tokenEnd; + } + } + + // if we ended up here, the texture name is missing + return ""; +} + +// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below +// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test +static inline void Test_GetTextureNameFromSamplerName() +{ + #define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t)) + + CHECK("sampler_point_clamp", ""); + CHECK("sampler_point_clamp_Tex", "_Tex"); + CHECK("sampler_point_clamp_Tex__", "_Tex__"); + CHECK("sampler_______point_Tex", "_Tex"); + + CHECK("samplerPointClamp", ""); + CHECK("samplerPointClamp_Tex", "_Tex"); + CHECK("samplerPointClamp_Tex__", "_Tex__"); + + CHECK("samplerPointTexClamp", "Tex"); + CHECK("samplerPoint_TexClamp", "_Tex"); + CHECK("samplerPoint_Tex_Clamp", "_Tex"); + + #undef CHECK +} + void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info) { if (info.empty()) return; +#if _DEBUG && 0 + Test_GetTextureNameFromSamplerName(); +#endif + for (size_t i = 0; i < psResourceBindings.size(); i++) { ResourceBinding *rb = &psResourceBindings[i]; - if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE) + if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED) continue; - HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match + // Try finding the exact match + HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); - // If match not found, check if name has "sampler" prefix - // -> try finding a match without the prefix (DX11 style sampler case) + // If match not found, check if name has "sampler" prefix (DX11 style sampler case) + // then we try to recover texture name from sampler name if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) - j = info.find(rb->name.substr(7, rb->name.size() - 7)); + j = info.find(GetTextureNameFromSamplerName(rb->name)); + // note that if we didnt find the respective texture, we cannot say anything about sampler precision + // currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float + // TODO: should we somehow allow overriding it? if (j != info.end()) rb->ePrecision = j->second; } diff --git a/src/UseDefineChains.cpp b/src/UseDefineChains.cpp index 496854f..f6f7e89 100644 --- a/src/UseDefineChains.cpp +++ b/src/UseDefineChains.cpp @@ -582,8 +582,6 @@ static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, // Add this define and all its siblings to the table and try again AddDefineToList(defs, *dl); return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - canSplit = 0; - break; } dl++; @@ -642,61 +640,6 @@ void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineCh } } -// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process. -// Sampler ops, bitwise ops and comparisons are ignored. -static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType) -{ - Instruction *psInst = du->psInst; - int hasFullPrecOperands = 0; - uint32_t i; - - if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - return 0; - - switch (psInst->eOpcode) - { - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_MOV: - case OPCODE_MAD: - case OPCODE_DIV: - case OPCODE_LOG: - case OPCODE_EXP: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_DP2: - case OPCODE_DP2ADD: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_RSQ: - case OPCODE_SQRT: - break; - default: - return 0; - } - - for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++) - { - Operand *op = &psInst->asOperands[i]; - if (op->eType == OPERAND_TYPE_IMMEDIATE32) - continue; // Immediate values are ignored - - if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) - { - hasFullPrecOperands = 1; - break; - } - } - - if (hasFullPrecOperands) - return 0; - - if (pType) - *pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump - - return 1; -} - // Returns true if all the usages of this definitions are instructions that deal with floating point data static bool HasOnlyFloatUsages(DefineUseChain::iterator du) { @@ -747,8 +690,7 @@ void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains while (du != psDUChains[i].end()) { OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; - if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) - || CanDowngradeDefinitionPrecision(du, &sType)) + if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT && du->isStandalone diff --git a/src/decode.cpp b/src/decode.cpp index a96b235..b0622b3 100644 --- a/src/decode.cpp +++ b/src/decode.cpp @@ -696,7 +696,6 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, // int iTupleSrc = 0, iTupleDest = 0; //const uint32_t ui32ConstCount = pui32Token[1] - 2; //const uint32_t ui32TupleCount = (ui32ConstCount / 4); - /*CUSTOMDATA_CLASS eClass =*/ DecodeCustomDataClass(pui32Token[0]); const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; @@ -841,6 +840,7 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns psInst->eOpcode = eOpcode; psInst->bSaturate = DecodeInstructionSaturate(*pui32Token); + psInst->ui32PreciseMask = DecodeInstructionPreciseMask(*pui32Token); psInst->bAddressOffset = 0; @@ -1386,7 +1386,6 @@ const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens, while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. { uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); - /*const uint32_t bExtended =*/ DecodeIsOpcodeExtended(*pui32CurrentToken); const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); if (eOpcode == OPCODE_CUSTOMDATA) diff --git a/src/internal_includes/ControlFlowGraph.h b/src/internal_includes/ControlFlowGraph.h index 58a75f7..b9263cf 100644 --- a/src/internal_includes/ControlFlowGraph.h +++ b/src/internal_includes/ControlFlowGraph.h @@ -30,7 +30,7 @@ namespace ControlFlow typedef std::vector > BasicBlockStorage; - const BasicBlock &Build(const Instruction *firstInstruction); + const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction); // Only works for instructions that start the basic block const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; @@ -62,31 +62,33 @@ namespace ControlFlow struct Definition { - Definition(const Instruction *i = NULL, const Operand *o = NULL) + Definition(const Instruction* i = nullptr, const Operand* o = nullptr) : m_Instruction(i) , m_Operand(o) {} - Definition(const Definition &a) - : m_Instruction(a.m_Instruction) - , m_Operand(a.m_Operand) - {} + Definition(const Definition& a) = default; + Definition(Definition&& a) = default; + ~Definition() = default; - bool operator==(const Definition &a) const + Definition& operator=(const Definition& a) = default; + Definition& operator=(Definition&& a) = default; + + bool operator==(const Definition& a) const { if (a.m_Instruction != m_Instruction) return false; return a.m_Operand == m_Operand; } - bool operator!=(const Definition &a) const + bool operator!=(const Definition& a) const { if (a.m_Instruction == m_Instruction) return false; return a.m_Operand != m_Operand; } - bool operator<(const Definition &a) const + bool operator<(const Definition& a) const { if (m_Instruction != a.m_Instruction) return m_Instruction < a.m_Instruction; @@ -118,7 +120,7 @@ namespace ControlFlow private: // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() - BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead); + BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd); // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. void Build(); @@ -133,6 +135,7 @@ namespace ControlFlow const Instruction *m_First; // The first instruction in the basic block const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction + const Instruction *m_End; // past-the-end pointer RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) RegisterSet m_VarKill; // Set of variables that are defined in this block. diff --git a/src/internal_includes/Declaration.h b/src/internal_includes/Declaration.h index cb3e446..0586a22 100644 --- a/src/internal_includes/Declaration.h +++ b/src/internal_includes/Declaration.h @@ -15,14 +15,16 @@ typedef struct ICBVec4_TAG #define ACCESS_FLAG_READ 0x1 #define ACCESS_FLAG_WRITE 0x2 +#define ACCESS_FLAG_ATOMIC 0x4 struct Declaration { - Declaration() - : + Declaration() : eOpcode(OPCODE_INVALID), ui32NumOperands(0), - ui32BufferStride(0) + ui32BufferStride(0), + ui32TableLength(0), + ui32IsShadowTex(0) {} OPCODE_TYPE eOpcode; @@ -85,6 +87,12 @@ struct Declaration { uint32_t ui32Stride; uint32_t ui32Count; + + TGSM_TAG() : + ui32Stride(0), + ui32Count(0) + { + } } sTGSM; struct IndexableTemp_TAG @@ -92,6 +100,13 @@ struct Declaration uint32_t ui32RegIndex; uint32_t ui32RegCount; uint32_t ui32RegComponentSize; + + IndexableTemp_TAG() : + ui32RegIndex(0), + ui32RegCount(0), + ui32RegComponentSize(0) + { + } } sIdxTemp; uint32_t ui32TableLength; diff --git a/src/internal_includes/HLSLCrossCompilerContext.h b/src/internal_includes/HLSLCrossCompilerContext.h index 29be214..fa2af76 100644 --- a/src/internal_includes/HLSLCrossCompilerContext.h +++ b/src/internal_includes/HLSLCrossCompilerContext.h @@ -15,7 +15,21 @@ class HLSLccReflection; class HLSLCrossCompilerContext { public: - HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {} + HLSLCrossCompilerContext(HLSLccReflection &refl) : + glsl(nullptr), + extensions(nullptr), + beforeMain(nullptr), + currentGLSLString(nullptr), + currentPhase(0), + indent(0), + flags(0), + psShader(nullptr), + psDependencies(nullptr), + inputPrefix(nullptr), + outputPrefix(nullptr), + psTranslator(nullptr), + m_Reflection(refl) + {} bstring glsl; bstring extensions; @@ -42,6 +56,7 @@ public: const char *outputPrefix; // Prefix for shader outputs void DoDataTypeAnalysis(ShaderPhase *psPhase); + void ReserveFramebufferFetchInputs(); void ClearDependencyData(); diff --git a/src/internal_includes/HLSLccToolkit.h b/src/internal_includes/HLSLccToolkit.h index e9ce772..6d7604e 100644 --- a/src/internal_includes/HLSLccToolkit.h +++ b/src/internal_includes/HLSLccToolkit.h @@ -41,6 +41,10 @@ namespace HLSLcc SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); + RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type); + + REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type); + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); diff --git a/src/internal_includes/Instruction.h b/src/internal_includes/Instruction.h index f6a32e3..a2826aa 100644 --- a/src/internal_includes/Instruction.h +++ b/src/internal_includes/Instruction.h @@ -21,23 +21,51 @@ struct Instruction { - Instruction() - : eOpcode(OPCODE_NOP) - , eBooleanTestType(INSTRUCTION_TEST_ZERO) - , ui32NumOperands(0) - , ui32FirstSrc(0) - , m_Uses() - , m_SkipTranslation(false) - , m_InductorRegister(0) - , bSaturate(0) - , m_IsStaticBranch(false) - , m_StaticBranchCondition(NULL) + Instruction() : + eOpcode(OPCODE_NOP), + eBooleanTestType(INSTRUCTION_TEST_ZERO), + ui32NumOperands(0), + ui32FirstSrc(0), + m_Uses(), + m_SkipTranslation(false), + m_InductorRegister(0), + bSaturate(0), + ui32SyncFlags(0), + ui32PreciseMask(0), + ui32FuncIndexWithinInterface(0), + eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), + bAddressOffset(0), + iUAddrOffset(0), + iVAddrOffset(0), + iWAddrOffset(0), + xType(RETURN_TYPE_UNUSED), + yType(RETURN_TYPE_UNUSED), + zType(RETURN_TYPE_UNUSED), + wType(RETURN_TYPE_UNUSED), + eResDim(RESOURCE_DIMENSION_UNKNOWN), + iCausedSplit(0), + id(0) { m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; } // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) - Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) + Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) : + ui32SyncFlags(0), + bSaturate(0), + ui32PreciseMask(0), + ui32FuncIndexWithinInterface(0), + eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), + bAddressOffset(0), + iUAddrOffset(0), + iVAddrOffset(0), + iWAddrOffset(0), + xType(RETURN_TYPE_UNUSED), + yType(RETURN_TYPE_UNUSED), + zType(RETURN_TYPE_UNUSED), + wType(RETURN_TYPE_UNUSED), + eResDim(RESOURCE_DIMENSION_UNKNOWN), + iCausedSplit(0) { id = _id; eOpcode = opcode; @@ -119,6 +147,7 @@ struct Instruction uint32_t ui32FirstSrc; Operand asOperands[6]; uint32_t bSaturate; + uint32_t ui32PreciseMask; uint32_t ui32FuncIndexWithinInterface; RESINFO_RETURN_TYPE eResInfoReturnType; @@ -130,23 +159,24 @@ struct Instruction RESOURCE_DIMENSION eResDim; int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking - bool m_IsStaticBranch; // If true, this instruction is a static branch - const Instruction *m_StaticBranchCondition; // If this is a static branch, this instruction points to the condition instruction. Can also be NULL if the operand itself is the condition - std::string m_StaticBranchName; // The name of the static branch variable, with the condition encoded in it. - struct Use { Use() : m_Inst(0), m_Op(0) {} - Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} - Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + Use(const Use& a) = default; + Use(Use&& a) = default; + Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} + ~Use() = default; - Instruction *m_Inst; // The instruction that references the result of this instruction - Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand. + Use& operator=(const Use& a) = default; + Use& operator=(Use&& a) = default; + + Instruction* m_Inst; // The instruction that references the result of this instruction + Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand. }; std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. - Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. + Instruction* m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it diff --git a/src/internal_includes/Operand.h b/src/internal_includes/Operand.h index e1e91a2..59bae49 100644 --- a/src/internal_includes/Operand.h +++ b/src/internal_includes/Operand.h @@ -129,11 +129,16 @@ public: struct Define { Define() : m_Inst(0), m_Op(0) {} - Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} - Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + Define(const Define& a) = default; + Define(Define&& a) = default; + Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} + ~Define() = default; - Instruction *m_Inst; // Instruction that writes to the temp - Operand *m_Op; // The (destination) operand within that instruction. + Define& operator=(const Define& other) = default; + Define& operator=(Define&& other) = default; + + Instruction* m_Inst; // Instruction that writes to the temp + Operand* m_Op; // The (destination) operand within that instruction. }; std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) diff --git a/src/internal_includes/Shader.h b/src/internal_includes/Shader.h index 5c2eee0..98a716b 100644 --- a/src/internal_includes/Shader.h +++ b/src/internal_includes/Shader.h @@ -15,9 +15,9 @@ struct ConstantArrayChunk { - ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {} + ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {} ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) - : m_Size(sz), m_AccessMask(mask) + : m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0) { m_UseSites.push_back(firstUse); } @@ -63,7 +63,7 @@ public: m_NextTexCoordTemp(0) {} - void ResolveUAVProperties(); + void ResolveUAVProperties(const ShaderInfo& sInfo); void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier @@ -103,9 +103,6 @@ public: uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds - // Instructions that are static branches (branches based on constant buffer values only) - std::vector m_StaticBranchInstructions; - private: bool m_CFGInitialized; HLSLcc::ControlFlow::ControlFlowGraph m_CFG; @@ -143,9 +140,8 @@ public: aiOpcodeUsed(NUM_OPCODES, 0), ui32CurrentVertexOutputStream(0), textureSamplers(), - aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0), - ui32CurrentStructuredBufferIndex(), - m_DummySamplerDeclared(false) + m_DummySamplerDeclared(false), + maxSemanticIndex(0) { } @@ -157,14 +153,6 @@ public: //Convert from per-phase temps to global temps. void ConsolidateHullTempVars(); - // Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list - void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase); - - // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. - // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. - // In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. - void PrepareStructuredBufferBindingSlots(); - // Detect temp registers per data type that are actually used. void PruneTempRegisters(); @@ -172,6 +160,9 @@ public: // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. void AnalyzeIOOverlap(); + // Compute maxSemanticIndex based on the results of AnalyzeIOOverlap + void SetMaxSemanticIndex(); + // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. void ForcePositionToHighp(); @@ -245,9 +236,6 @@ public: TextureSamplerPairs textureSamplers; - std::vector aui32StructuredBufferBindingPoints; - uint32_t ui32CurrentStructuredBufferIndex; - std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp std::vector psInt16TempSizes; // min16ints std::vector psInt12TempSizes; // min12ints @@ -260,6 +248,7 @@ public: std::vector psBoolTempSizes; // ... and for bools bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. + uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis private: void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); diff --git a/src/internal_includes/languages.h b/src/internal_includes/languages.h index b6f58bc..eefbeca 100644 --- a/src/internal_includes/languages.h +++ b/src/internal_includes/languages.h @@ -43,6 +43,14 @@ static int HaveOverloadedTextureFuncs(const GLLang eLang) return 1; } +static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext) +{ + if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0) + return true; + + return false; +} + //Only enable for ES. Vulkan and Switch. //Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan. static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) @@ -308,4 +316,13 @@ static int HaveImageLoadStore(const GLLang eLang) return 0; } +static int HavePreciseQualifier(const GLLang eLang) +{ + if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang + { + return 1; + } + return 0; +} + #endif diff --git a/src/internal_includes/toGLSL.h b/src/internal_includes/toGLSL.h index c14337a..b4ae9a8 100644 --- a/src/internal_includes/toGLSL.h +++ b/src/internal_includes/toGLSL.h @@ -10,9 +10,16 @@ class ToGLSL : public Translator protected: GLLang language; bool m_NeedUnityInstancingArraySizeDecl; + bool m_NeedUnityPreTransformDecl; public: - explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {} + explicit ToGLSL(HLSLCrossCompilerContext* ctx) : + Translator(ctx), + language(LANG_DEFAULT), + m_NeedUnityInstancingArraySizeDecl(false), + m_NeedUnityPreTransformDecl(false), + m_NumDeclaredWhileTrueLoops(0) + {} // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language GLLang SetLanguage(GLLang suggestedLanguage); @@ -22,27 +29,20 @@ public: virtual void SetIOPrefixes(); private: - // Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants. - void IdentifyStaticBranches(ShaderPhase *psPhase); - // May return false when we detect too complex stuff (matrices, arrays etc) - bool BuildStaticBranchNameForInstruction(Instruction &inst); - - void DeclareSpecializationConstants(ShaderPhase &phase); - - void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); - void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); + void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); - void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); - void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); + void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); void TranslateOperandIndex(const Operand* psOperand, int index); void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask); void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis); void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); @@ -53,10 +53,12 @@ private: void AddUserOutput(const Declaration* psDecl); void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); - void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix = false); + void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection); void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType); void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl); + void ReportStruct(const std::string &name, const struct ShaderVarType* psType); + typedef enum { CMP_EQ, @@ -68,8 +70,8 @@ private: void AddComparison(Instruction* psInst, ComparisonType eType, uint32_t typeFlag); - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded = false); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); void CallBinaryOp(const char* name, Instruction* psInst, int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, @@ -109,6 +111,8 @@ private: Instruction* psInst, bstring glsl); + void HandleSwitchTransformation(Instruction* psInst, bstring glsl); + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. bool DeclareExtraFunction(const std::string &name, bstring body); void UseExtraFunctionDependency(const std::string &name); @@ -124,6 +128,117 @@ private: FunctionDefinitions m_FunctionDefinitions; std::vector m_FunctionDefinitionsOrder; + std::vector m_AdditionalDefinitions; + + std::vector m_DefinedStructs; + std::set m_DeclaredRenderTarget; int m_NumDeclaredWhileTrueLoops; + + struct SwitchConversion + { + /* + IF (CONDITION1) BREAK; STATEMENT1; IF (CONDITION2) BREAK; STATEMENT2;... transforms to + if (CONDITION1) {} ELSE { STATEMENT1; IF (CONDITION2) {} ELSE {STATEMENT2; ...} } + thus, we need to count the "BREAK" statements we encountered in each IF on the same level inside a SWITCH. + */ + struct ConditionalInfo + { + int breakCount; // Count BREAK on the same level to emit enough closing braces afterwards + bool breakEncountered; // Just encountered a BREAK statment, potentially need to emit "ELSE" + bool endifEncountered; // We need to check for "ENDIF ELSE" sequence, and not emit "else" if we see it + + ConditionalInfo() : + ConditionalInfo(0, false) + {} + + explicit ConditionalInfo(int initialBreakCount) : + ConditionalInfo(initialBreakCount, false) + {} + + ConditionalInfo(int initialBreakCount, bool withEndif) : + ConditionalInfo(initialBreakCount, withEndif, false) + {} + + ConditionalInfo(int initialBreakCount, bool withEndif, bool withBreak) : + breakCount(initialBreakCount), + endifEncountered(withEndif), + breakEncountered(withBreak) + {} + }; + + bstring switchOperand; + // We defer emitting if (condition) for each CASE statement to concatenate possible CASE A: CASE B:... into one if (). + std::vector currentCaseOperands; + std::vector conditionalsInfo; + int isInLoop; // We don't count "BREAK" (end emit them) if we're in a loop. + bool isFirstCase; + + SwitchConversion() : + switchOperand(bfromcstr("")), + isInLoop(0), + isFirstCase(true) + {} + + SwitchConversion(const SwitchConversion& other) : + switchOperand(bstrcpy(other.switchOperand)), + conditionalsInfo(other.conditionalsInfo), + isInLoop(other.isInLoop), + isFirstCase(other.isFirstCase) + { + currentCaseOperands.reserve(other.currentCaseOperands.size()); + for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) + currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); + } + + SwitchConversion(SwitchConversion&& other) : + switchOperand(other.switchOperand), + currentCaseOperands(std::move(other.currentCaseOperands)), + conditionalsInfo(std::move(other.conditionalsInfo)), + isInLoop(other.isInLoop), + isFirstCase(other.isFirstCase) + { + other.switchOperand = nullptr; + } + + ~SwitchConversion() + { + bdestroy(switchOperand); + for (size_t i = 0; i < currentCaseOperands.size(); ++i) + bdestroy(currentCaseOperands[i]); + } + + SwitchConversion& operator=(const SwitchConversion& other) + { + if (this == &other) + return *this; + + switchOperand = bstrcpy(other.switchOperand); + conditionalsInfo = other.conditionalsInfo; + isInLoop = other.isInLoop; + isFirstCase = other.isFirstCase; + currentCaseOperands.reserve(other.currentCaseOperands.size()); + for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) + currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); + + return *this; + } + + SwitchConversion& operator=(SwitchConversion&& other) + { + if (this == &other) + return *this; + + switchOperand = other.switchOperand; + conditionalsInfo = std::move(other.conditionalsInfo); + isInLoop = other.isInLoop; + isFirstCase = other.isFirstCase; + currentCaseOperands = std::move(other.currentCaseOperands); + + other.switchOperand = nullptr; + + return *this; + } + }; + std::vector m_SwitchStack; }; diff --git a/src/internal_includes/toMetal.h b/src/internal_includes/toMetal.h index d3beed6..08d8eb1 100644 --- a/src/internal_includes/toMetal.h +++ b/src/internal_includes/toMetal.h @@ -20,8 +20,6 @@ struct TextureSamplerDesc class ToMetal : public Translator { -protected: - GLLang language; public: explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx) @@ -78,9 +76,9 @@ private: // ToMetalInstruction.cpp void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask); void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis); void AddAssignPrologue(int numParenthesis); typedef enum @@ -96,8 +94,8 @@ private: bool CanForceToHalfOperand(const Operand *psOperand); - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); void CallBinaryOp(const char* name, Instruction* psInst, int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, @@ -152,6 +150,14 @@ private: BindingSlotAllocator m_TextureSlots, m_SamplerSlots; BindingSlotAllocator m_BufferSlots; + struct BufferReflection + { + uint32_t bind; + bool isUAV; + bool hasCounter; + }; + std::map m_BufferReflections; + std::vector m_Samplers; std::vector m_Textures; diff --git a/src/internal_includes/tokens.h b/src/internal_includes/tokens.h index 912234b..671ccbd 100644 --- a/src/internal_includes/tokens.h +++ b/src/internal_includes/tokens.h @@ -714,6 +714,11 @@ static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) return (ui32Token & 0x00002000) ? 1 : 0; } +static uint32_t DecodeInstructionPreciseMask(uint32_t ui32Token) // "precise" keyword +{ + return (uint32_t)((ui32Token & 0x00780000) >> 19); +} + typedef enum OPERAND_MIN_PRECISION { OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision diff --git a/src/reflect.cpp b/src/reflect.cpp index dca2004..303dbfd 100644 --- a/src/reflect.cpp +++ b/src/reflect.cpp @@ -430,10 +430,19 @@ static void ReadResources(const uint32_t* pui32Tokens,//in pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i); } - //Map resource bindings to constant buffers if (psShaderInfo->psConstantBuffers.size()) { + /* HLSL allows the following: + cbuffer A + {...} + cbuffer A + {...} + And both will be present in the assembly if used + + So we need to track which ones we matched already and throw an error if two buffers have the same name + */ + std::vector alreadyBound(ui32NumConstantBuffers, 0); for (i = 0; i < ui32NumResourceBindings; ++i) { ResourceGroup eRGroup; @@ -444,9 +453,11 @@ static void ReadResources(const uint32_t* pui32Tokens,//in //Find the constant buffer whose name matches the resource at the given resource binding point for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) { - if (psConstantBuffers[cbufIndex].name == psResBindings[i].name) + if (psConstantBuffers[cbufIndex].name == psResBindings[i].name && alreadyBound[cbufIndex] == 0) { psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; + alreadyBound[cbufIndex] = 1; + break; } } } diff --git a/src/toGLSL.cpp b/src/toGLSL.cpp index 41707e3..c6c4e14 100644 --- a/src/toGLSL.cpp +++ b/src/toGLSL.cpp @@ -497,6 +497,11 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) inputName = oss.str(); } + if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) + continue; + + psContext->psDependencies->RecordHullShaderInput(inputName); + std::string outputName; { std::ostringstream oss; @@ -513,8 +518,9 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) prec = "highp "; } - int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true); - int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false); + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); + int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false, keepLocation, psContext->psShader->maxSemanticIndex); psContext->AddIndentation(); if (ui32NumComponents > 1) @@ -562,6 +568,29 @@ GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage) return language; } +// Go through all declarations and remove reserve UAV occupied binding points +void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase, HLSLCrossCompilerContext *psContext, GLSLCrossDependencyData *glslDependencyData) +{ + for (uint32_t p = 0; p < psPhase->psDecl.size(); ++p) + { + if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point + + bstring BufNamebstr = bfromcstr(""); + ResourceName(BufNamebstr, psContext, RGROUP_UAV, psPhase->psDecl[p].asOperands[0].ui32RegisterNumber, 0); + + char *btmp = bstr2cstr(BufNamebstr, '\0'); + std::string BufName = btmp; + bcstrfree(btmp); + bdestroy(BufNamebstr); + + glslDependencyData->ReserveNamedBindPoint(BufName, uav, GLSLCrossDependencyData::BufferType_ReadWrite); + } + } +} + bool ToGLSL::Translate() { bstring glsl; @@ -578,6 +607,8 @@ bool ToGLSL::Translate() psShader->ExpandSWAPCs(); psShader->ForcePositionToHighp(); psShader->AnalyzeIOOverlap(); + if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) + psShader->SetMaxSemanticIndex(); psShader->FindUnusedGlobals(psContext->flags); psContext->indent = 0; @@ -633,18 +664,44 @@ bool ToGLSL::Translate() bcatcstr(glsl, "#endif\n"); } - - psShader->PrepareStructuredBufferBindingSlots(); + if (psContext->psShader->eTargetLanguage != LANG_ES_100) + { + bool hasConstantBuffers = psContext->psShader->sInfo.psConstantBuffers.size() > 0; + if (hasConstantBuffers) + { + // This value will be replaced at runtime with 0 if we need to disable UBO. + bcatcstr(glsl, "#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1\n"); + bcatcstr(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n#else\n#define UNITY_UNIFORM uniform\n#endif\n"); + } + bool hasTextures = false; + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + if (psShader->asPhases[0].psDecl[i].eOpcode == OPCODE_DCL_RESOURCE) + { + hasTextures = true; + break; + } + } + if (hasTextures || hasConstantBuffers) + { + // This value will be replaced at runtime with 0 if we need to disable explicit uniform locations. + bcatcstr(glsl, "#define UNITY_SUPPORTS_UNIFORM_LOCATION 1\n"); + bcatcstr(glsl, "#if UNITY_SUPPORTS_UNIFORM_LOCATION\n#define UNITY_LOCATION(x) layout(location = x)\n#define UNITY_BINDING(x) layout(binding = x, std140)\n#else\n#define UNITY_LOCATION(x)\n#define UNITY_BINDING(x) layout(std140)\n#endif\n"); + } + } for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) { ShaderPhase &phase = psShader->asPhases[ui32Phase]; phase.UnvectorizeImmMoves(); psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(); - psShader->ResolveStructuredBufferBindingSlots(&phase); + phase.ResolveUAVProperties(psShader->sInfo); + ResolveStructuredBufferBindingSlots(&phase, psContext, psContext->psDependencies); if (!psContext->IsVulkan() && !psContext->IsSwitch()) + { phase.PruneConstArrays(); + psContext->ReserveFramebufferFetchInputs(); + } } psShader->PruneTempRegisters(); @@ -654,11 +711,6 @@ bool ToGLSL::Translate() // Loop transform can only be done after the temps have been pruned ShaderPhase &phase = psShader->asPhases[ui32Phase]; HLSLcc::DoLoopTransform(psContext, phase); - - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - IdentifyStaticBranches(&phase); - } } //Special case. Can have multiple phases. @@ -694,20 +746,16 @@ bool ToGLSL::Translate() ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; psContext->currentPhase = ui32Phase; -#ifdef _DEBUG - bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + } + for (i = 0; i < psPhase->psDecl.size(); ++i) { TranslateDeclaration(&psPhase->psDecl[i]); } - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - DeclareSpecializationConstants(*psPhase); - } - - bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); psContext->indent++; @@ -758,15 +806,19 @@ bool ToGLSL::Translate() if (psPhase->earlyMain->slen > 1) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); + } + bconcat(glsl, psPhase->earlyMain); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); + } } for (i = 0; i < psPhase->ui32InstanceCount; i++) @@ -777,15 +829,19 @@ bool ToGLSL::Translate() if (psPhase->hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + bconcat(glsl, psPhase->postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } } @@ -834,6 +890,13 @@ bool ToGLSL::Translate() bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); } } + if (m_NeedUnityPreTransformDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); + } + } bconcat(extensions, glsl); bdestroy(glsl); @@ -912,11 +975,6 @@ bool ToGLSL::Translate() TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); } - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - DeclareSpecializationConstants(psShader->asPhases[0]); - } - // Search and replace string, for injecting generated functions that need to be after default precision declarations bconcat(glsl, generatedFunctionsKeyword); @@ -932,15 +990,19 @@ bool ToGLSL::Translate() if (psContext->psShader->asPhases[0].earlyMain->slen > 1) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); + } + bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); + } } for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) @@ -952,18 +1014,26 @@ bool ToGLSL::Translate() bcatcstr(glsl, "}\n"); - // Print out extra functions we generated in generation order to satisfy dependencies + // Print out extra definitions and functions we generated in generation order to satisfy dependencies { - bstring generatedFunctions = bfromcstr(""); + bstring generatedFunctionsAndDefinitions = bfromcstr(""); + + for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) + { + bcatcstr(generatedFunctionsAndDefinitions, m_AdditionalDefinitions[i].c_str()); + bcatcstr(generatedFunctionsAndDefinitions, "\n"); + } + for (std::vector::const_iterator funcNameIter = m_FunctionDefinitionsOrder.begin(); funcNameIter != m_FunctionDefinitionsOrder.end(); ++funcNameIter) { const FunctionDefinitions::const_iterator definition = m_FunctionDefinitions.find(*funcNameIter); ASSERT(definition != m_FunctionDefinitions.end()); - bcatcstr(generatedFunctions, definition->second.c_str()); - bcatcstr(generatedFunctions, "\n"); + bcatcstr(generatedFunctionsAndDefinitions, definition->second.c_str()); + bcatcstr(generatedFunctionsAndDefinitions, "\n"); } - bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctions, 0); - bdestroy(generatedFunctions); + bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctionsAndDefinitions, 0); + bdestroy(generatedFunctionsAndDefinitions); + bdestroy(generatedFunctionsKeyword); } // Concat extensions and glsl for the final shader code. @@ -978,6 +1048,13 @@ bool ToGLSL::Translate() bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); } } + if (m_NeedUnityPreTransformDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); + } + } bconcat(extensions, glsl); bdestroy(glsl); @@ -1111,233 +1188,3 @@ void ToGLSL::UseExtraFunctionDependency(const std::string &name) bdestroy(code); } - -void ToGLSL::DeclareSpecializationConstants(ShaderPhase &phase) -{ - bstring glsl = psContext->glsl; - // There may be several uses for the same branch condition, so we'll need to keep track of what we've already declared. - std::set alreadyDeclared; - for (std::vector::iterator itr = phase.m_StaticBranchInstructions.begin(); itr != phase.m_StaticBranchInstructions.end(); itr++) - { - Instruction &i = **itr; - uint32_t slot = psContext->psDependencies->GetSpecializationConstantSlot(i.m_StaticBranchName); - if (alreadyDeclared.insert(slot).second) // Only declare if the insertion actually succeeded - bformata(glsl, "layout(constant_id = %d) const bool %s = false;\n", slot, i.m_StaticBranchName.c_str()); - } -} - -std::string to64(uint32_t in) -{ - const char to64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - char c_[2]; - c_[0] = to64[in]; - c_[1] = 0; - char c = c_[0]; - if (c == 'X') - return "XX"; - if (c == '+') - return "XA"; - if (c == '/') - return "XB"; - return std::string(c_); -} - -// Slightly custom base64, espace non-identifier chars with 'X' -static void Base64Encode(const std::string &in, std::string& result) -{ - size_t len = in.length(); - size_t outputLen = (len + 2) / 3 * 4; - unsigned char *bytes = (unsigned char *)&in[0]; - - result.clear(); - result.reserve(outputLen); - - int i = 0; - unsigned char b1, b2, b3; - for (int chunk = 0; len > 0; len -= 3, chunk++) - { - b1 = bytes[i++]; - b2 = len > 1 ? bytes[i++] : '\0'; - result += to64(b1 >> 2); - result += to64(((b1 & 3) << 4) | ((b2 & 0xf0) >> 4)); - if (len > 2) - { - b3 = bytes[i++]; - result += to64(((b2 & 0xF) << 2) | ((b3 & 0xC0) >> 6)); - result += to64(b3 & 0x3F); - } - else if (len == 2) - { - result += to64((b2 & 0xF) << 2); - result += "XC"; - break; - } - else /* len == 1 */ - { - result += "XC"; - break; - } - } -} - -bool ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) -{ - std::ostringstream oss; - if (!inst.m_StaticBranchCondition) - { - // Simple case, just get the value, check if nonzero - bstring varname = bfromcstr(""); - SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); - uint32_t flag = TO_FLAG_NONE; - switch (argType) - { - case SVT_BOOL: - flag = TO_FLAG_BOOL; - break; - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - flag = TO_FLAG_INTEGER; - break; - case SVT_UINT: - case SVT_UINT16: - case SVT_UINT8: - flag = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - break; - } - TranslateOperand(varname, &inst.asOperands[0], flag); - char *str = bstr2cstr(varname, '\0'); - oss << str; - bcstrfree(str); - bdestroy(varname); - oss << "!=0"; - std::string res = oss.str(); - // Sanity checks: no arrays, no matrices - if (res.find('[') != std::string::npos) - return false; - if (res.find("hlslcc_mtx") != std::string::npos) - return false; - Base64Encode(res, inst.m_StaticBranchName); - } - else - { - // Indirect, just store the whole previous instruction and then the condition - bstring res = bfromcstr(""); - - bstring *oldglsl = psContext->currentGLSLString; - psContext->currentGLSLString = &res; - TranslateInstruction((Instruction *)inst.m_StaticBranchCondition, true); - psContext->currentGLSLString = oldglsl; - - SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); - uint32_t flag = TO_FLAG_NONE; - switch (argType) - { - case SVT_BOOL: - flag = TO_FLAG_BOOL; - break; - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - flag = TO_FLAG_INTEGER; - break; - case SVT_UINT: - case SVT_UINT16: - case SVT_UINT8: - flag = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - break; - } - - if (argType == SVT_BOOL) - { - if (inst.eBooleanTestType == INSTRUCTION_TEST_ZERO) - bcatcstr(res, "!"); - } - - TranslateOperand(res, &inst.asOperands[0], flag); - char *str = bstr2cstr(res, '\0'); - oss << str; - bcstrfree(str); - bdestroy(res); - if (argType != SVT_BOOL) - oss << "!=0"; - - std::string ress = oss.str(); - // Sanity checks: no arrays, no matrices - if (ress.find('[') != std::string::npos) - return false; - if (ress.find("hlslcc_mtx") != std::string::npos) - return false; - Base64Encode(ress, inst.m_StaticBranchName); - } - return true; -} - -void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase) -{ - for (std::vector::iterator itr = psPhase->psInst.begin(); itr != psPhase->psInst.end(); itr++) - { - Instruction &i = *itr; - - if (!i.IsConditionalBranchInstruction()) - continue; - - // Simple case, direct conditional branch - if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER) - { - i.m_StaticBranchCondition = NULL; - if (BuildStaticBranchNameForInstruction(i)) - { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; - } - } - // Indirect, comparison via another instruction - if (i.asOperands[0].eType == OPERAND_TYPE_TEMP) - { - // Check that the temp only has one visible definition - if (i.asOperands[0].m_Defines.size() == 1) - { - // ...and that it only uses constant buffers and immediates - - Instruction &def = *i.asOperands[0].m_Defines[0].m_Inst; - bool isStatic = true; - for (uint32_t k = def.ui32FirstSrc; k < def.ui32NumOperands; k++) - { - Operand &o = def.asOperands[k]; - if (!(o.eType == OPERAND_TYPE_CONSTANT_BUFFER || o.eType == OPERAND_TYPE_IMMEDIATE32)) - { - isStatic = false; - break; - } - // Also check that the constant buffer access is "simple" - if (o.eType == OPERAND_TYPE_CONSTANT_BUFFER) - { - if (o.m_SubOperands[0].get() || o.m_SubOperands[1].get()) - { - isStatic = false; - break; - } - } - } - if (isStatic) - { - i.m_StaticBranchCondition = &def; - if (BuildStaticBranchNameForInstruction(i)) - { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; - } - else - i.m_StaticBranchCondition = NULL; - } - } - } - } -} diff --git a/src/toGLSLDeclaration.cpp b/src/toGLSLDeclaration.cpp index ca521f4..efafefa 100644 --- a/src/toGLSLDeclaration.cpp +++ b/src/toGLSLDeclaration.cpp @@ -26,10 +26,61 @@ using namespace HLSLcc; #endif #endif // #ifndef fpcheck -void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix) +static bool UseReflection(HLSLCrossCompilerContext* psContext) +{ + return !psContext->IsSwitch() && psContext->psShader->eShaderType != COMPUTE_SHADER; +} + +static SHADER_VARIABLE_TYPE TypeToReport(SHADER_VARIABLE_TYPE type) +{ + switch (type) + { + case SVT_BOOL: + case SVT_INT: + case SVT_UINT: + case SVT_UINT8: + case SVT_FORCED_INT: + case SVT_INT_AMBIGUOUS: + case SVT_INT16: + case SVT_INT12: + case SVT_UINT16: + return SVT_UINT; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + return SVT_FLOAT; + + default: + return type; + } +} + +static void GenerateUnsupportedFormatWarning(HLSLccReflection& refl, const char* name) +{ + std::ostringstream oss; + oss << "The resource '" << name << "' uses an unsupported type/format"; + refl.OnDiagnostics(oss.str(), -1, false); +} + +static void GenerateUnsupportedReadWriteFormatWarning(HLSLccReflection& refl, const char* name) +{ + std::ostringstream oss; + oss << "The resource '" << name << "' uses an unsupported type/format for read/write access"; + refl.OnDiagnostics(oss.str(), -1, false); +} + +void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection) { bstring glsl = *psContext->currentGLSLString; + if (reportInReflection && !psContext->IsVulkan() && psType->Class != SVC_STRUCT && UseReflection(psContext)) + { + const bool isMatrix = psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS; + const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Type); + psContext->m_Reflection.OnConstant(varName, 0, type, psType->Rows, psType->Columns, isMatrix, psType->Elements, true); + } + if (psType->Class == SVC_STRUCT) { bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); @@ -117,13 +168,21 @@ void ToGLSL::PreDeclareStructType(const std::string &name, const struct ShaderVa //Not supported at the moment ASSERT(name != "$Element"); + for (size_t i = 0; i < m_DefinedStructs.size(); ++i) + { + if (m_DefinedStructs[i] == name) + return; + } + + m_DefinedStructs.push_back(name); + bformata(glsl, "struct %s_Type {\n", name.c_str()); for (i = 0; i < psType->MemberCount; ++i) { ASSERT(psType->Members.size() != 0); - DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0); + DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0, false, false); } bformata(glsl, "};\n"); @@ -241,6 +300,8 @@ static void DeclareInput( std::string locationQualifier = ""; + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || ((psContext->flags & HLSLCC_FLAG_NVN_TARGET) && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions))) { @@ -257,7 +318,7 @@ static void DeclareInput( if (addLocation) { std::ostringstream oss; - oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true) << ") "; + oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true, keepLocation, psShader->maxSemanticIndex) << ") "; locationQualifier = oss.str(); } } @@ -267,7 +328,7 @@ static void DeclareInput( // Do the reflection report on vertex shader inputs if (psShader->eShaderType == VERTEX_SHADER) { - psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true)); + psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex)); } switch (eIndexDim) @@ -387,7 +448,7 @@ void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, cons if (IsESLanguage(psShader->eTargetLanguage)) psContext->RequireExtension("GL_EXT_clip_cull_distance"); else if (eSpecialName == NAME_CULL_DISTANCE) - psContext->RequireExtension("GL_ARB_cull_distance"); // TODO: it is builtin in GLSL 4.5 (should we care?) + psContext->RequireExtension("GL_ARB_cull_distance"); const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; @@ -744,7 +805,7 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) strncpy(OutputName, (char *)oname->data, 512); bdestroy(oname); - if (psShader->eShaderType == VERTEX_SHADER) + if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == GEOMETRY_SHADER) { if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation @@ -759,7 +820,8 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage)) { - bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false)); + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false, keepLocation, psShader->maxSemanticIndex)); } if (InOutSupported(psContext->psShader->eTargetLanguage)) @@ -791,6 +853,27 @@ void ToGLSL::AddUserOutput(const Declaration* psDecl) } } +void ToGLSL::ReportStruct(const std::string &name, const struct ShaderVarType* psType) +{ + if (psContext->IsVulkan() || psContext->IsSwitch() || psType->Class != SVC_STRUCT) + return; + + for (uint32_t i = 0; i < psType->MemberCount; ++i) + { + if (psType->Members[i].Class == SVC_STRUCT) + ReportStruct(psType->Members[i].name, &psType->Members[i]); + } + + for (uint32_t i = 0; i < psType->MemberCount; ++i) + { + const bool isMatrix = psType->Members[i].Class == SVC_MATRIX_COLUMNS || psType->Members[i].Class == SVC_MATRIX_ROWS; + const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Members[i].Type); + psContext->m_Reflection.OnConstant(psType->Members[i].fullName.c_str(), 0, type, psType->Members[i].Rows, psType->Members[i].Columns, isMatrix, psType->Members[i].Elements, true); + } + + psContext->m_Reflection.OnConstant(psType->fullName.c_str(), 0, SVT_VOID, psType->Rows, psType->Columns, false, psType->Elements, true); +} + void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) { uint32_t i; @@ -843,26 +926,48 @@ void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const Constant } if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n"); + bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); + + uint32_t slot = 0xffffffff; + bool isKnown = true; /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) { GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); - bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.first, binding.second); + bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.set, binding.binding); } else { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(binding = %d, std140) ", ui32BindingPoint); + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) + { + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(cbName, GLSLCrossDependencyData::BufferType_UBO); + isKnown = bindPointInfo.known; + slot = bindPointInfo.slot; + bformata(glsl, "UNITY_BINDING(%d) ", slot); + } else bcatcstr(glsl, "layout(std140) "); + + if (slot != 0xffffffff && !isKnown && UseReflection(psContext)) + { + psContext->m_Reflection.OnConstantBuffer(cbName, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(skipUnused)); + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + ReportStruct(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + } } + const bool reportInReflection = slot != 0xffffffff && !isKnown && UseReflection(psContext); + bformata(glsl, "uniform %s {\n", cbName.c_str()); if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#else\n#define UNITY_UNIFORM uniform\n#endif\n"); + bformata(glsl, "#endif\n"); for (i = 0; i < psCBuf->asVars.size(); ++i) { @@ -870,12 +975,11 @@ void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const Constant continue; DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), - &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); + &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false, reportInReflection); } if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); - + bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) { @@ -886,7 +990,10 @@ void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const Constant bcatcstr(glsl, "};\n"); if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); + bformata(glsl, "#endif\n"); + + if (reportInReflection) + psContext->m_Reflection.OnConstantBufferBinding(cbName, slot); } bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand) @@ -924,8 +1031,6 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; bstring BufNamebstr = bfromcstr(""); // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. - if (!isUAV && !isVulkan) - ui32BindingPoint = psContext->psShader->aui32StructuredBufferBindingPoints[psContext->psShader->ui32CurrentStructuredBufferIndex++]; ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); @@ -943,14 +1048,19 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t bformata(glsl, " struct %s_type {\n\t%s[%d] value;\n};\n\n", BufName.c_str(), typeStr, stride / 4); } + uint32_t slot = 0xffffffff; + bool isKnown = true; if (isVulkan) { GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); - bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.first, binding.second); + bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.set, binding.binding); } else { - bformata(glsl, "layout(std430, binding = %d) ", ui32BindingPoint); + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(BufName, isUAV ? GLSLCrossDependencyData::BufferType_ReadWrite : GLSLCrossDependencyData::BufferType_SSBO); + slot = bindPointInfo.slot; + isKnown = bindPointInfo.known; + bformata(glsl, "layout(std430, binding = %d) ", slot); } if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) @@ -959,7 +1069,8 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t if (!isUAV) bcatcstr(glsl, "readonly "); - bformata(glsl, "buffer %s {\n\t", BufName.c_str()); + // For Nintendo Switch, adds a "decoration" to get around not being able to detect readonly modifier on the SSBO via the platform shader reflection API. + bformata(glsl, "buffer %s%s {\n\t", psContext->IsSwitch() && !isUAV ? "hlslcc_readonly" : "", BufName.c_str()); if (hasEmbeddedCounter) bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); @@ -975,6 +1086,9 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t bformata(glsl, "%s_type", BufName.c_str()); bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); + + if (!isKnown && slot != 0xffffffff && UseReflection(psContext)) + psContext->m_Reflection.OnBufferBinding(BufName, slot, isUAV); } void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, @@ -1009,7 +1123,7 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, ASSERT(0); // Catch this to see what's going on std::string bname = "wut"; GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); } else { @@ -1023,6 +1137,26 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, bcatcstr(glsl, "_Type {\n"); } + else + { + if (psCBuf->name == "$Globals") + { + // GLSL needs to report $Globals in reflection so that SRP batcher can properly determine if the shader is compatible with it or not. + if (UseReflection(psContext) && !psContext->IsVulkan()) + { + size_t memberCount = 0; + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (!psCBuf->asVars[i].sType.m_IsUsed) + continue; + + memberCount += psCBuf->asVars[i].sType.GetMemberCount(); + } + + psContext->m_Reflection.OnConstantBuffer(psCBuf->name, 0, memberCount); + } + } + } for (i = 0; i < psCBuf->asVars.size(); ++i) { @@ -1032,7 +1166,7 @@ void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, if (!useGlobalsStruct) bcatcstr(glsl, "uniform "); - DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0); + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0, false, true); } if (useGlobalsStruct) @@ -1206,6 +1340,50 @@ static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, return "texture2D"; } +static HLSLCC_TEX_DIMENSION GetTextureDimension(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_TEXTURE1D: + return eType == RETURN_TYPE_SINT || eType == RETURN_TYPE_UINT ? TD_INT : TD_FLOAT; + + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + return TD_2D; + + case RESOURCE_DIMENSION_TEXTURE3D: + return TD_3D; + + case RESOURCE_DIMENSION_TEXTURECUBE: + return TD_CUBE; + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + return TD_2DARRAY; + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + return TD_CUBEARRAY; + default: + ASSERT(0); + break; + } + + return TD_2D; +} + // Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures const char* GetSamplerType(HLSLCrossCompilerContext* psContext, const RESOURCE_DIMENSION eDimension, @@ -1337,6 +1515,8 @@ const char* GetSamplerType(HLSLCrossCompilerContext* psContext, case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_OES_texture_storage_multisample_2d_array"); switch (eType) { case RETURN_TYPE_SINT: @@ -1405,7 +1585,7 @@ static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const D psDecl->asOperands[0].ui32RegisterNumber); GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); bcatcstr(glsl, "uniform "); bcatcstr(glsl, samplerPrecision); bcatcstr(glsl, samplerTypeName); @@ -1476,20 +1656,6 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const } } - if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - //Create shadow and non-shadow sampler. - //HLSL does not have separate types for depth compare, just different functions. - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); - - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, "Shadow "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); bcatcstr(glsl, "uniform "); @@ -1498,6 +1664,26 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const bcatcstr(glsl, " "); bcatcstr(glsl, tname.c_str()); bcatcstr(glsl, ";\n"); + + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + //Create shadow and non-shadow sampler. + //HLSL does not have separate types for depth compare, just different functions. + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || + ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) + { + GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); + bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); + } + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } } void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) @@ -1722,12 +1908,20 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) case NAME_RENDER_TARGET_ARRAY_INDEX: { AddBuiltinOutput(psDecl, 0, "gl_Layer"); - if (psShader->eShaderType == VERTEX_SHADER) + if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) { if (psContext->IsVulkan()) + { psContext->RequireExtension("GL_ARB_shader_viewport_layer_array"); - else + } + else if (psContext->IsSwitch()) + { + psContext->RequireExtension("GL_NV_viewport_array2"); + } + else if (psShader->eShaderType == VERTEX_SHADER) // case 1261150 + { psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } } break; @@ -1890,20 +2084,17 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { break; } - // ...or control points - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - // Also skip position input to domain shader - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == DOMAIN_SHADER) + // Also skip position input to hull and domain shader + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && + (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)) { const ShaderInfo::InOutSignature *psIn = NULL; psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); ASSERT(psIn != NULL); - if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "POS") && psIn->ui32SemanticIndex == 0) + if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "SV_Position" + || psIn->semanticName == "POS" || psIn->semanticName == "POSITION") && psIn->ui32SemanticIndex == 0) break; } @@ -1915,6 +2106,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + // In the case of the Hull Shader, due to the different phases, we might have already delcared this input + // so check to see if that is the case, and if not record it + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) + { + return; + } + + psContext->psDependencies->RecordHullShaderInput(inputName); + } + if (InOutSupported(psContext->psShader->eTargetLanguage)) { if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) @@ -2163,16 +2366,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) if (WriteToFragData(psContext->psShader->eTargetLanguage)) { - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#endif\n"); } else { @@ -2204,16 +2402,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) auto lq = bstr2cstr(layoutQualifier, '\0'); - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "%sinout %s %s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); bcstrfree(lq); bdestroy(layoutQualifier); @@ -2234,29 +2427,98 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) uint32_t i = 0; const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; bool usePrecision = (HavePrecisionQualifiers(psContext) != 0); + // Default values for temp variables allow avoiding Switch shader compiler incorrect warnings + // related to potential use of uninitialized variables (false-positives from compiler). + bool useDefaultInit = psContext->IsSwitch(); for (i = 0; i < ui32NumTemps; i++) { - if (psShader->psFloatTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); - if (psShader->psFloat16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); - if (psShader->psFloat10TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); - if (psShader->psIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); - if (psShader->psInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); - if (psShader->psInt12TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); - if (psShader->psUIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); - if (psShader->psUInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); - if (psShader->psBoolTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); + if (useDefaultInit) + { + if (psShader->psFloatTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psFloat16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psFloat10TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psIntTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psInt16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psInt12TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psUIntTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psUInt16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psBoolTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + } + else + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); + } } break; } @@ -2276,10 +2538,23 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) // We don't have a original resource name, maybe generate one??? if (!psCBuf) { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(location = %d) ", ui32BindingPoint); + char name[24]; + sprintf(name, "ConstantBuffer%d", ui32BindingPoint); - bformata(glsl, "layout(std140) uniform ConstantBuffer%d {\n\tvec4 data[%d];\n} cb%d;\n", ui32BindingPoint, psOperand->aui32ArraySizes[1], ui32BindingPoint); + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->IsVulkan() ? + GLSLCrossDependencyData::GLSLBufferBindPointInfo{ ui32BindingPoint, true } : psContext->psDependencies->GetGLSLResourceBinding(name, GLSLCrossDependencyData::BufferType_Constant); + + bool isKnown = bindPointInfo.known; + uint32_t actualBindingPoint = bindPointInfo.slot; + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) + { + if (!psContext->IsVulkan() && !isKnown && UseReflection(psContext)) + psContext->m_Reflection.OnConstantBufferBinding(name, actualBindingPoint); + bformata(glsl, "UNITY_LOCATION(%d) ", actualBindingPoint); + } + + bformata(glsl, "layout(std140) uniform %s {\n\tvec4 data[%d];\n} cb%d;\n", name, psOperand->aui32ArraySizes[1], ui32BindingPoint); break; } @@ -2294,7 +2569,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) char ty = psCBuf->name[20]; int idx = psCBuf->name[22] - '0'; bool isMS = false; - std::pair binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); bool declared = false; for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) @@ -2307,31 +2582,31 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) switch (ty) { case 'f': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); break; case 'h': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); break; case 'i': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); break; case 'u': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); break; case 'F': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); isMS = true; break; case 'H': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); isMS = true; break; case 'I': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); isMS = true; break; case 'U': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); isMS = true; break; default: @@ -2388,6 +2663,12 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } } + if (IsPreTransformConstantBufferName(psCBuf->name.c_str())) + { + m_NeedUnityPreTransformDecl = true; + break; // Break out so we don't actually declare this cbuffer + } + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) { if (psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') @@ -2416,15 +2697,23 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) break; } - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || + ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) { - // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler - // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. - if ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); + + bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); + if (!slotInfo.known && UseReflection(psContext)) { - //Constant buffer locations start at 0. Resource locations start at ui32NumConstantBuffers. - bformata(glsl, "layout(location = %d) ", - psContext->psShader->sInfo.psConstantBuffers.size() + psDecl->asOperands[0].ui32RegisterNumber); + const RESOURCE_DIMENSION dim = psDecl->value.eResourceDimension; + if (dim == RESOURCE_DIMENSION_BUFFER) + psContext->m_Reflection.OnBufferBinding(tname, slotInfo.slot, false); + else + { + bool isMSAATex = (dim == RESOURCE_DIMENSION_TEXTURE2DMS) || (dim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY); + psContext->m_Reflection.OnTextureBinding(tname, slotInfo.slot, slotInfo.slot, isMSAATex, GetTextureDimension(psContext, dim, psDecl->asOperands[0].ui32RegisterNumber), false); + } } } @@ -2442,51 +2731,26 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) bcatcstr(glsl, ";\n"); break; } + case RESOURCE_DIMENSION_TEXTURE1D: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } case RESOURCE_DIMENSION_TEXTURE2D: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } case RESOURCE_DIMENSION_TEXTURECUBE: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } case RESOURCE_DIMENSION_TEXTURECUBEARRAY: { TranslateResourceTexture(psContext, psDecl, 1); break; } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + default: ASSERT(0); break; @@ -2501,7 +2765,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) // Need extra check from signature: const ShaderInfo::InOutSignature *sig = NULL; psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); - if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS") + if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS" || sig->semanticName == "SV_Position") { needsDeclare = false; AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); @@ -2523,10 +2787,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); psShader->sInfo.bEarlyFragmentTests = true; } - if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + if ((ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED) && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) { - //TODO add precise - //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + static const char * const types[] = + { + "vec4", "ivec4", "bvec4", "uvec4" + }; + + for (int i = 0; i < sizeof(types) / sizeof(types[0]); ++i) + { + char const * t = types[i]; + bformata(glsl, "precise %s u_xlat_precise_%s;\n", t, t); + } } if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) { @@ -3081,11 +3353,11 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); ASSERT(pRes != NULL); std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); - const char *samplerPrecision = GetSamplerPrecision(psContext, pRes ? pRes->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + const char *samplerPrecision = GetSamplerPrecision(psContext, pRes->ePrecision); GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; - bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.first, binding.second, samplerPrecision, samplerType, name.c_str()); + bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.set, binding.binding, samplerPrecision, samplerType, name.c_str()); // Store the sampler mode to ShaderInfo, it's needed when we use the sampler pRes->m_SamplerMode = psDecl->value.eSamplerMode; } @@ -3108,6 +3380,10 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) bcatcstr(glsl, "coherent "); } + // Use 4 component format as a fallback if no instruction defines it + const uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; + REFLECT_RESOURCE_PRECISION precision = REFLECT_RESOURCE_PRECISION_UNKNOWN; + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) { //Special case on desktop glsl: writeonly image does not need format qualifier @@ -3115,9 +3391,6 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } else { - // Use 4 component format as a fallback if no instruction defines it - uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; - if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) bcatcstr(glsl, "writeonly "); else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) @@ -3129,42 +3402,118 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) psContext->RequireExtension("GL_EXT_texture_buffer"); } - if (isVulkan) + if (psContext->IsSwitch() && !(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_ATOMIC)) { - std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); - bformata(glsl, "layout(set = %d, binding = %d, ", binding.first, binding.second); + // Switch supports the GL_EXT_shader_image_load_formatted extension but it does require being enabled. + // Allows imageLoad() to do formatted reads and match the ld_uav_typed_indexable instruction. + // GL_EXT_shader_image_load_formatted doesn't provide support for imageAtomic*() functions. These still require format layout qualifier + psContext->RequireExtension("GL_EXT_shader_image_load_formatted"); + bformata(glsl, "layout(binding=%d) ", bindpoint); + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + case RETURN_TYPE_UINT: + case RETURN_TYPE_SINT: + bcatcstr(glsl, "highp "); //TODO: half case? + break; + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + bcatcstr(glsl, "lowp "); + break; + default: + ASSERT(0); + } } else - bformata(glsl, "layout(binding=%d, ", bindpoint); - - //TODO: catch bad format cases. e.g. es supports only limited format set. no rgb formats on glsl - if (numComponents >= 1) - bcatcstr(glsl, "r"); - if (numComponents >= 2) - bcatcstr(glsl, "g"); - if (numComponents >= 3) - bcatcstr(glsl, "ba"); - - switch (psDecl->sUAV.Type) { - case RETURN_TYPE_FLOAT: - bcatcstr(glsl, "32f) highp "); //TODO: half case? - break; - case RETURN_TYPE_UNORM: - bcatcstr(glsl, "8) lowp "); - break; - case RETURN_TYPE_SNORM: - bcatcstr(glsl, "8_snorm) lowp "); - break; - case RETURN_TYPE_UINT: - bcatcstr(glsl, "32ui) highp "); //TODO: 16/8 cases? - break; - case RETURN_TYPE_SINT: - bcatcstr(glsl, "32i) highp "); //TODO: 16/8 cases? - break; - default: - ASSERT(0); + if (isVulkan) + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + bformata(glsl, "layout(set = %d, binding = %d, ", binding.set, binding.binding); + } + else + bformata(glsl, "layout(binding=%d, ", bindpoint); + + const ResourceBinding* psBinding = 0; + if (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding)) + precision = psBinding->ePrecision; + + if (psDecl->sUAV.Type == RETURN_TYPE_FLOAT && numComponents == 3 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "r11f_g11f_b10f) mediump "); + } + else if (psDecl->sUAV.Type == RETURN_TYPE_UNORM && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "rgb10_a2) mediump "); + } + else if (psDecl->sUAV.Type == RETURN_TYPE_UINT && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "rgb10_a2ui) mediump "); + } + else + { + if (numComponents >= 1) + bcatcstr(glsl, "r"); + if (numComponents >= 2) + bcatcstr(glsl, "g"); + if (numComponents >= 3) + bcatcstr(glsl, "ba"); + + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + { + switch (precision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "16f) mediump "); break; + default: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "32f) highp "); break; + } + } break; + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + { + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "8%s) lowp ", psDecl->sUAV.Type == RETURN_TYPE_SNORM ? "_snorm" : ""); + } break; + case RETURN_TYPE_UINT: + case RETURN_TYPE_SINT: + { + const char* fmt = psDecl->sUAV.Type == RETURN_TYPE_UINT ? "ui" : "i"; + switch (precision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "8%s) lowp ", fmt); break; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "16%s) mediump ", fmt); break; + default: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "32%s) highp ", fmt); break; + } + } break; + default: + ASSERT(0); + } + } } } @@ -3178,8 +3527,12 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { case RESOURCE_DIMENSION_BUFFER: { - if (IsESLanguage(psShader->eTargetLanguage)) + if (IsESLanguage(psShader->eTargetLanguage) || psContext->IsVulkan()) + { psContext->RequireExtension("GL_EXT_texture_buffer"); + if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + } bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); break; @@ -3235,6 +3588,21 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); bcatcstr(glsl, ";\n"); + + unsigned int accessFlags = 0; + if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) + accessFlags |= HLSLccReflection::ReadAccess; + if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) + accessFlags |= HLSLccReflection::WriteAccess; + + if (IsESLanguage(psContext->psShader->eTargetLanguage) && accessFlags == (HLSLccReflection::ReadAccess | HLSLccReflection::WriteAccess)) + { + if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) + GenerateUnsupportedReadWriteFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + } + + psContext->m_Reflection.OnStorageImage(bindpoint, accessFlags); + break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: @@ -3247,8 +3615,8 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); @@ -3260,13 +3628,13 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) } else { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + name += "_counter"; bcatcstr(glsl, "layout (binding = 0) uniform "); if (HavePrecisionQualifiers(psContext)) bcatcstr(glsl, "highp "); - bcatcstr(glsl, "atomic_uint "); - ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - bcatcstr(glsl, "_counter; \n"); + bformata(glsl, "atomic_uint %s;\n", name.c_str()); DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); @@ -3289,17 +3657,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl) { std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); } else { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + name += "_counter"; bcatcstr(glsl, "layout (binding = 0) uniform "); + if (HavePrecisionQualifiers(psContext)) bcatcstr(glsl, "highp "); - bcatcstr(glsl, "atomic_uint "); - ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - bformata(glsl, "_counter; \n"); + bformata(glsl, "atomic_uint %s;\n", name.c_str()); } } @@ -3526,7 +3895,8 @@ bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::In if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) { - if (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0) + if ((sig->semanticName == "POS" || sig->semanticName == "POSITION" || sig->semanticName == "SV_POSITION" || sig->semanticName == "SV_Position") + && sig->ui32SemanticIndex == 0) { result = "gl_out[gl_InvocationID].gl_Position"; return true; @@ -3540,6 +3910,10 @@ bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::In return true; } + if ((psOperand->eType == OPERAND_TYPE_OUTPUT || psOperand->eType == OPERAND_TYPE_INPUT) + && HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + // TODO: Add other builtins here. if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) { diff --git a/src/toGLSLInstruction.cpp b/src/toGLSLInstruction.cpp index a497491..c0732ff 100644 --- a/src/toGLSLInstruction.cpp +++ b/src/toGLSLInstruction.cpp @@ -26,7 +26,7 @@ bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* ps // and pSrcCount will be filled with the number of components expected // ui32CompMask can be used to only write to 1 or more components (used by MOVC) void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask) { uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); bstring glsl = *psContext->currentGLSLString; @@ -37,6 +37,34 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + bcatcstr(glsl, " = "); + + if (precise && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) + { + char const *t, *s; + switch (eDestDataType) + { + case SVT_BOOL: t = "bvec4"; break; + case SVT_INT: t = "ivec4"; break; + case SVT_FLOAT: t = "vec4"; break; + case SVT_UINT: t = "uvec4"; break; + default: ASSERT(0); t = NULL; break; + } + switch (ui32DestElementCount) + { + case 1: s = ".x"; break; + case 2: s = ".xy"; break; + case 3: s = ".xyz"; break; + case 4: s = ".xyzw"; break; + default: ASSERT(0); s = NULL; break; + } + if (t && s) + { + bformata(glsl, "(u_xlat_precise_%s%s = (", t, s); + (*pNeedsParenthesis) += 2; + } + } + // Simple path: types match. if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) { @@ -44,11 +72,10 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); if (ui32DestElementCount > ui32SrcElementCount) { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - *pNeedsParenthesis = 1; + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; } - else - bformata(glsl, " %s ", szAssignmentOp); + return; } @@ -61,7 +88,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { - bformata(glsl, " %s floatBitsToInt(", szAssignmentOp); + bcatcstr(glsl, "floatBitsToInt("); // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { @@ -70,7 +97,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; @@ -79,7 +106,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { - bformata(glsl, " %s floatBitsToUint(", szAssignmentOp); + bcatcstr(glsl, "floatBitsToUint("); // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { @@ -88,7 +115,7 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; @@ -100,9 +127,9 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) { if (eSrcType == SVT_INT) - bformata(glsl, " %s intBitsToFloat(", szAssignmentOp); + bcatcstr(glsl, "intBitsToFloat("); else - bformata(glsl, " %s uintBitsToFloat(", szAssignmentOp); + bcatcstr(glsl, "uintBitsToFloat("); // Cover cases where the HLSL language expects the rest of the components to be default-filled if (ui32DestElementCount > ui32SrcElementCount) { @@ -111,21 +138,24 @@ void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, } } else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + break; + case SVT_BOOL: + bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); (*pNeedsParenthesis)++; break; default: - // TODO: Handle bools? ASSERT(0); break; } } void ToGLSL::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis) { - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); } void ToGLSL::AddAssignPrologue(int numParenthesis, bool isEmbedded /* = false*/) @@ -191,7 +221,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, } else { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); bcatcstr(glsl, "("); @@ -253,7 +283,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, bformata(glsl, "%s", glslOpcode[eType]); TranslateOperand(&psInst->asOperands[2], typeFlag); bcatcstr(glsl, "; "); - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); bcatcstr(glsl, "!!cond ? "); if (floatResult) bcatcstr(glsl, "1.0 : 0.0"); @@ -261,7 +291,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, { // Old ES3.0 Adrenos treat 0u as const int. // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) - bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? ") ? 0xFFFFFFFFu : uint(0)" : ") ? -1 : 0"); + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "0xFFFFFFFFu : uint(0)" : "-1 : 0"); } AddAssignPrologue(needsParenthesis, true); bcatcstr(glsl, "; }\n"); @@ -279,7 +309,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, } else { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); bcatcstr(glsl, "("); } TranslateOperand(&psInst->asOperands[1], typeFlag); @@ -303,7 +333,7 @@ void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, } } -void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded /* = false*/) +void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded /* = false*/) { int numParenthesis = 0; int srcSwizzleCount = pSrc->GetNumSwizzleElements(); @@ -312,13 +342,13 @@ void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); uint32_t flags = SVTTypeToFlag(eSrcType); - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, &numParenthesis); TranslateOperand(pSrc, flags, writeMask); AddAssignPrologue(numParenthesis, isEmbedded); } -void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) +void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) { bstring glsl = *psContext->currentGLSLString; uint32_t destElemCount = pDest->GetNumSwizzleElements(); @@ -349,7 +379,7 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand int numParenthesis = 0; SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + AddAssignToDest(pDest, eDestType, destElemCount, precise, &numParenthesis); bcatcstr(glsl, "("); if (s0Type == SVT_UINT || s0Type == SVT_UINT16) TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); @@ -388,7 +418,8 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand } else { - // TODO: We can actually do this in one op using mix(). + // NOTE: mix() cannot be used to implement MOVC, because it propagates + // NaN from both endpoints. int srcElem = -1; SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); @@ -430,7 +461,7 @@ void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand continue; psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, &numParenthesis, 1 << destElem); bcatcstr(glsl, "("); if (s0Type == SVT_BOOL) { @@ -519,7 +550,7 @@ void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, if (!isEmbedded) psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); // Adreno 3xx fails on binary ops that operate on vectors bool opComponentWiseOnAdreno = (!strcmp("&", name) || !strcmp("|", name) || !strcmp("^", name) || !strcmp(">>", name) || !strcmp("<<", name)); @@ -579,7 +610,7 @@ void ToGLSL::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); bformata(glsl, " %s ", op1); @@ -609,7 +640,7 @@ void ToGLSL::CallHelper3(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -641,7 +672,7 @@ void ToGLSL::CallHelper2(const char* name, Instruction* psInst, } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -672,7 +703,7 @@ void ToGLSL::CallHelper2Int(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -701,7 +732,7 @@ void ToGLSL::CallHelper2UInt(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -722,7 +753,7 @@ void ToGLSL::CallHelper1(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -746,7 +777,7 @@ void ToGLSL::CallHelper1Int( psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -777,7 +808,7 @@ std::string ToGLSL::GetVulkanDummySamplerName() { GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); bstring code = bfromcstr(""); - bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.first, binding.second, dummySmpName.c_str()); + bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.set, binding.binding, dummySmpName.c_str()); DeclareExtraFunction(dummySmpName, code); bdestroy(code); psContext->psShader->m_DummySamplerDeclared = true; @@ -809,7 +840,7 @@ void ToGLSL::TranslateTexelFetch( } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, psInst->ui32PreciseMask, &numParenthesis); if (hasOffset) bcatcstr(glsl, "texelFetchOffset("); @@ -977,7 +1008,7 @@ void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) } psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); //[width, height, depth or array size, total-mip-count] if (index < 3) @@ -1201,12 +1232,22 @@ void ToGLSL::TranslateTextureSample(Instruction* psInst, SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. - const bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); + // Workaround for switch for OPCODE_SAMPLE_C_LZ, in particular sampler2dArrayShadow.SampleCmpLevelZero(). + // textureGrad() with shadow samplers is not implemented in HW on switch so the behavior is emulated using shuffles and 4 texture fetches. + // The code generated is very heavy. + // Workaround: use standard texture fetch, shadows are currently not mipmapped, so that should work for now. + if (needsLodWorkaround && psContext->IsSwitch() && ui32Flags == (TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD)) + { + needsLodWorkaround = false; + ui32Flags &= ~TEXSMP_FLAG_FIRSTLOD; + } + if (needsLodWorkaround) { bformata(glsl, "%sGrad%s(", funcName, offset); @@ -1480,7 +1521,8 @@ void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) bcatcstr(glsl, "]"); uint32_t srcFlag = TO_FLAG_UNSIGNED_INTEGER; - if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psDest)) + if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psDest) && + psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) // group shared is uint srcFlag = TO_FLAG_INTEGER; bcatcstr(glsl, " = "); @@ -1533,8 +1575,8 @@ void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) srcOffFlag = TO_FLAG_INTEGER; psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); //TODO check this out? - if (destCount > 1) + AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, &numParenthesis); //TODO check this out? + if (destCount > 1 || destDataType == SVT_FLOAT16) { bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); numParenthesis++; @@ -1613,10 +1655,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) { case OPCODE_IMM_ATOMIC_IADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); + } func = "Add"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1626,10 +1669,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); + } func = "Add"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1638,10 +1682,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_AND: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); + } func = "And"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1651,10 +1696,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_AND: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); + } func = "And"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1663,10 +1709,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_OR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); + } func = "Or"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1676,10 +1723,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_OR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); + } func = "Or"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1688,10 +1736,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); + } func = "Xor"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1701,10 +1750,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); + } func = "Xor"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1714,10 +1764,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) case OPCODE_IMM_ATOMIC_EXCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); + } func = "Exchange"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1727,10 +1778,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_CMP_EXCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); + } func = "CompSwap"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1741,10 +1793,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_CMP_STORE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); + } func = "CompSwap"; previousValue = 0; dest = &psInst->asOperands[0]; @@ -1755,10 +1808,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); + } func = "Min"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1768,10 +1822,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); + } func = "Min"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1780,10 +1835,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); + } func = "Min"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1793,10 +1849,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); + } func = "Min"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1805,10 +1862,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); + } func = "Max"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1818,10 +1876,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); + } func = "Max"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1830,10 +1889,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); + } func = "Max"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1843,10 +1903,11 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); + } func = "Max"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1909,7 +1970,7 @@ void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); if (texDim > 0) bcatcstr(glsl, "imageAtomic"); @@ -1997,14 +2058,6 @@ void ToGLSL::TranslateConditional( statement = "return"; } - if (psInst->m_IsStaticBranch) - { - // Instead of the actual condition, use the specialization constant instead - - // But first we'll have to make sure the original values don't get dropped out (we rely on glslang not being very smart) - bcatcstr(glsl, "if(false)\n {\n"); - } - SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); if (argType == SVT_BOOL) { @@ -2050,29 +2103,68 @@ void ToGLSL::TranslateConditional( bcatcstr(glsl, " {\n"); } } - if (psInst->m_IsStaticBranch) +} + +void ToGLSL::HandleSwitchTransformation(Instruction* psInst, bstring glsl) +{ + SwitchConversion& current = m_SwitchStack.back(); + if (psInst->eOpcode != OPCODE_CASE && current.currentCaseOperands.size() > 0) { - if (psInst->eOpcode == OPCODE_IF) + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, current.isFirstCase ? "if(" : "} else if("); + current.isFirstCase = false; + for (size_t i = 0; i < current.currentCaseOperands.size(); ++i) { - bcatcstr(glsl, "}\n}\n"); + if (i > 0) + bcatcstr(glsl, " || "); + + bformata(glsl, "%s == %s", current.switchOperand->data, current.currentCaseOperands[i]->data); + bdestroy(current.currentCaseOperands[i]); } - else + bcatcstr(glsl, ") {\n"); + ++psContext->indent; + current.currentCaseOperands.clear(); + } + + if (current.conditionalsInfo.size() > 0) + { + SwitchConversion::ConditionalInfo& conditional = current.conditionalsInfo.back(); + + if (conditional.breakEncountered) { - bcatcstr(glsl, "}\n"); + // We first check for BREAK ENDIF sequence. + // If we see ELSE or CASE afterwards, we don't emit our own ELSE. + if (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered) + conditional.endifEncountered = true; + else + { + conditional.endifEncountered = false; + conditional.breakEncountered = false; + if (psInst->eOpcode == OPCODE_ELSE) + { + if (conditional.breakCount > 0) + --conditional.breakCount; + } + else if (psInst->eOpcode != OPCODE_CASE) + { + psContext->AddIndentation(); + bcatcstr(glsl, "else {\n"); + ++psContext->indent; + } + } } - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - bcatcstr(glsl, psInst->m_StaticBranchName.c_str()); - if (psInst->eOpcode != OPCODE_IF) + + if (psInst->eOpcode == OPCODE_CASE || psInst->eOpcode == OPCODE_ENDSWITCH || (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered)) { - bformata(glsl, "){%s;}\n", statement); + for (int i = 0; i < conditional.breakCount; ++i) + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + current.conditionalsInfo.pop_back(); } - else - { - bcatcstr(glsl, "){\n"); - } - return; } } @@ -2085,22 +2177,25 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals if (!isEmbedded) { -#ifdef _DEBUG - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); -#if 0 - if (psInst->id == 73) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); + #if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } + #endif } -#endif -#endif - if (psInst->m_SkipTranslation) return; } + if (!m_SwitchStack.empty()) + HandleSwitchTransformation(psInst, glsl); + switch (psInst->eOpcode) { case OPCODE_FTOI: @@ -2109,13 +2204,14 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); + } switch (psInst->asOperands[0].eMinPrecision) { case OPERAND_MIN_PRECISION_DEFAULT: @@ -2133,7 +2229,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); bcatcstr(glsl, "("); // 1 TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); @@ -2144,17 +2240,48 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_MOV: { -#ifdef _DEBUG - if (!isEmbedded) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } } -#endif if (!isEmbedded) psContext->AddIndentation(); - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], isEmbedded); + // UNITY SPECIFIC: you can check case 1158280 + // This looks like a hack because it is! There is a bug that is quite hard to reproduce. + // When doing data analysis we assume that immediates are ints and hope it will be promoted later + // which is kinda fine unless there is an unfortunate combination happening: + // We operate on 4-component registers - we need different components to be treated as float/int + // but we should not use float operations (as this will mark register as float) + // instead "float" components should be used for MOV and friends to other registers + // and they, in turn, should be used for float ops + // In pseudocode it can look like this: + // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly + // bool foo = var1.z | <...> // marked as int + // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints + // var1.w = 1 // var1 is marked int + // What is important is that this temporary is marked as int by us but DX compiler treats it + // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] + // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) + // + // At this point we have mov immediate to int temp (which should really be float temp) + { + Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; + if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int + && pDst->eType == OPERAND_TYPE_TEMP // dst is temp + && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate + && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated + ) + { + pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; + } + } + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask, isEmbedded); break; } case OPCODE_ITOF://signed to float @@ -2164,17 +2291,14 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - bcatcstr(glsl, "//ITOF\n"); + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + bcatcstr(glsl, "//ITOF\n"); + else + bcatcstr(glsl, "//UTOF\n"); } - else - { - bcatcstr(glsl, "//UTOF\n"); - } -#endif switch (psInst->asOperands[0].eMinPrecision) { @@ -2191,7 +2315,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); bcatcstr(glsl, "("); // 1 TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); @@ -2201,21 +2325,22 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_MAD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); + } CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); break; } case OPCODE_IMAD: { uint32_t ui32Flags = TO_FLAG_INTEGER; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { ui32Flags = TO_FLAG_UNSIGNED_INTEGER; @@ -2226,23 +2351,25 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_DADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); + } CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); break; } case OPCODE_IADD: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - if (!isEmbedded) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } } -#endif //Is this a signed or unsigned add? if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -2253,20 +2380,22 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_ADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); + } CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_OR: { /*Todo: vector version */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); + } uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t destMask = psInst->asOperands[0].GetAccessMask(); if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) @@ -2277,7 +2406,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals int needsParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " || "); TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); @@ -2285,25 +2414,50 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } else { - // Do component-wise and, glsl doesn't support || on bvecs - for (uint32_t k = 0; k < 4; k++) + Operand* pDest = &psInst->asOperands[0]; + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + const std::string tempName = "hlslcc_orTemp"; + + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); + bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); + TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, ";\n"); + + const_cast(pDest)->specialName.assign(tempName); + + int srcElem = -1; + for (uint32_t destElem = 0; destElem < 4; ++destElem) { - if ((destMask & (1 << k)) == 0) + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) continue; - int needsParenthesis = 0; psContext->AddIndentation(); - // Override dest mask temporarily - psInst->asOperands[0].ui32CompMask = (1 << k); - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + AddOpAssignToDestWithMask(pDest, eDestType, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << srcElem); bcatcstr(glsl, " || "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); - AddAssignPrologue(needsParenthesis); + TranslateOperand(&psInst->asOperands[2], SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); } - // Restore old mask - psInst->asOperands[0].ui32CompMask = destMask; + + const_cast(pDest)->specialName.clear(); + + psContext->AddIndentation(); + TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); + + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); } } else @@ -2314,12 +2468,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); + } uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); uint32_t ui32Flags = SVTTypeToFlag(eDataType); if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) @@ -2328,7 +2483,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { int needsParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " && "); TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); @@ -2347,7 +2502,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals // Override dest mask temporarily psInst->asOperands[0].ui32CompMask = (1 << k); ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, psInst->ui32PreciseMask, &needsParenthesis); TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); bcatcstr(glsl, " && "); TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); @@ -2367,7 +2522,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals if (dstSwizCount == 1) { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " ? "); TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); @@ -2375,68 +2530,105 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) + switch (eDataType) { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - } + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); } bcatcstr(glsl, ")"); } else if (eDataType == SVT_FLOAT) { - // We can use mix() - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); - bcatcstr(glsl, "mix("); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) + // We cannot use mix(), because it propagates NaN from both endpoints, which + // is not correct if the AND was used to implement a branch that guards against NaN. + // Instead, do either a single ?: select if the bool is a scalar, or component-wise + // ?: selects if the bool is a vector. + if (psInst->asOperands[boolOp].IsSwizzleReplicated()) { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) + // Bool is effectively a scalar, we can just do a single ?: + + // The swizzle is either xxxx, yyyy, zzzz, or wwww. In each case, + // the max component will give us the 1-based index. + int boolChannel = psInst->asOperands[boolOp].GetMaxComponent(); + + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << (boolChannel - 1)); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); + if (i > 0) + bcatcstr(glsl, ", "); + bcatcstr(glsl, "0.0"); } + bcatcstr(glsl, ")"); + } + else + { + bool needsIndent = false; + + // Do component-wise select + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + if (needsIndent) + psContext->AddIndentation(); + + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], eDataType, 1, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, 1 << k); + bcatcstr(glsl, " : 0.0"); + AddAssignPrologue(needsParenthesis); + + needsIndent = true; + } + + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; } - bcatcstr(glsl, "), "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, ", "); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ")"); - bcatcstr(glsl, ")"); } else { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + const bool haveNativeBitwiseOps = HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage); + if (!haveNativeBitwiseOps) + { + UseExtraFunctionDependency("op_and"); + bcatcstr(glsl, "op_and"); + } bcatcstr(glsl, "("); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); bcatcstr(glsl, "("); TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") * 0xFFFFFFFFu) & "); + bcatcstr(glsl, ") * 0xFFFFFFFFu"); else - bcatcstr(glsl, ") * -1) & "); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + + if (haveNativeBitwiseOps) + bcatcstr(glsl, ") & "); + else + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + if (!haveNativeBitwiseOps) + bcatcstr(glsl, ")"); } AddAssignPrologue(needsParenthesis); @@ -2454,29 +2646,32 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_NONE); break; } case OPCODE_MUL: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); + } CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_IMUL: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); + } if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) { eType = SVT_UINT; @@ -2489,10 +2684,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_UDIV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); + } //destQuotient, destRemainder, src0, src1 // There are cases where destQuotient is the same variable as src0 or src1. If that happens, @@ -2512,19 +2708,21 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_DIV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); + } CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_SINCOS: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); + } // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) @@ -2560,12 +2758,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_DP2: { int numParenthesis = 0; -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "dot("); TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); bcatcstr(glsl, ", "); @@ -2577,12 +2776,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_DP3: { int numParenthesis = 0; -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "dot("); TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); bcatcstr(glsl, ", "); @@ -2593,166 +2793,184 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_DP4: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); + } CallHelper2("dot", psInst, 0, 1, 2, 0); break; } case OPCODE_INE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); + } AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); break; } case OPCODE_NE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); + } AddComparison(psInst, CMP_NE, TO_FLAG_NONE); break; } case OPCODE_IGE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); break; } case OPCODE_ILT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); break; } case OPCODE_LT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_NONE); break; } case OPCODE_IEQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); + } AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); break; } case OPCODE_ULT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); break; } case OPCODE_UGE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); break; } case OPCODE_MOVC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); -#endif - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); + } + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); break; } case OPCODE_SWAPC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); + } // TODO needs temps!! - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); break; } case OPCODE_LOG: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); + } CallHelper1("log2", psInst, 0, 1, 1); break; } case OPCODE_RSQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); + } CallHelper1("inversesqrt", psInst, 0, 1, 1); break; } case OPCODE_EXP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); + } CallHelper1("exp2", psInst, 0, 1, 1); break; } case OPCODE_SQRT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); + } CallHelper1("sqrt", psInst, 0, 1, 1); break; } case OPCODE_ROUND_PI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); + } CallHelper1("ceil", psInst, 0, 1, 1); break; } case OPCODE_ROUND_NI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); + } CallHelper1("floor", psInst, 0, 1, 1); break; } case OPCODE_ROUND_Z: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) UseExtraFunctionDependency("trunc"); @@ -2761,11 +2979,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_ROUND_NE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) UseExtraFunctionDependency("roundEven"); @@ -2774,19 +2992,21 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_FRC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); + } CallHelper1("fract", psInst, 0, 1, 1); break; } case OPCODE_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) CallHelper2("max", psInst, 0, 1, 2, 1); else @@ -2795,10 +3015,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) CallHelper2("max", psInst, 0, 1, 2, 1); else @@ -2807,19 +3028,21 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_MAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); + } CallHelper2("max", psInst, 0, 1, 2, 1); break; } case OPCODE_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) CallHelper2("min", psInst, 0, 1, 2, 1); else @@ -2828,10 +3051,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_100) CallHelper2("min", psInst, 0, 1, 2, 1); else @@ -2840,124 +3064,136 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_MIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); + } CallHelper2("min", psInst, 0, 1, 2, 1); break; } case OPCODE_GATHER4: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); break; } case OPCODE_GATHER4_PO_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_GATHER4_PO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); break; } case OPCODE_GATHER4_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_SAMPLE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); break; } case OPCODE_SAMPLE_L: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); break; } case OPCODE_SAMPLE_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_SAMPLE_C_LZ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); break; } case OPCODE_SAMPLE_D: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); break; } case OPCODE_SAMPLE_B: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); break; } case OPCODE_RET: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); + } if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } } psContext->AddIndentation(); bcatcstr(glsl, "return;\n"); @@ -2973,10 +3209,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t funcBodyIndex; uint32_t ui32NumBodiesPerTable; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INTERFACE_CALL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INTERFACE_CALL\n"); + } ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); @@ -3000,10 +3237,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_LABEL: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LABEL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LABEL\n"); + } --psContext->indent; psContext->AddIndentation(); bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. @@ -3018,23 +3256,40 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_COUNTBITS: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + + // in glsl bitCount decl is genIType bitCount(genIType), so it is important that input/output types agree + // enter assembly: when writing swizzle encoding we use 0 to say "source from x" + // now, say, we generate code o.xy = bitcount(i.xy) + // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into + // bitCount(i.<..>) [well, tweaking swizzle, sure] + // what does that mean is that we can safely take output component count to determine "proper" type + // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic + const Operand* dst = &psInst->asOperands[0]; + const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; + + TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); bcatcstr(glsl, " = bitCount("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); bcatcstr(glsl, ");\n"); break; } case OPCODE_FIRSTBIT_HI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); bcatcstr(glsl, " = findMSB("); @@ -3044,10 +3299,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_FIRSTBIT_LO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); bcatcstr(glsl, " = findLSB("); @@ -3057,10 +3313,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_FIRSTBIT_SHI: //signed high { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); bcatcstr(glsl, " = findMSB("); @@ -3070,10 +3327,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_BFREV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); bcatcstr(glsl, " = bitfieldReverse("); @@ -3089,15 +3347,16 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); uint32_t i, j, k; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); + } if (psContext->psShader->eTargetLanguage == LANG_ES_300) UseExtraFunctionDependency("int_bitfieldInsert"); psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, psInst->ui32PreciseMask, &numParenthesis); if (numoverall_elements == 1) bformata(glsl, "int("); @@ -3133,31 +3392,37 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_CUT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "EndPrimitive();\n"); break; } case OPCODE_EMIT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT\n"); + } if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } } psContext->AddIndentation(); @@ -3166,10 +3431,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EMITTHENCUT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "EmitVertex();\n"); psContext->AddIndentation(); @@ -3179,10 +3445,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_CUT_STREAM: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT_STREAM\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT_STREAM\n"); + } psContext->AddIndentation(); ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) @@ -3201,21 +3468,26 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EMIT_STREAM: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT_STREAM\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT_STREAM\n"); + } if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } } psContext->AddIndentation(); @@ -3236,10 +3508,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EMITTHENCUT_STREAM: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); + } ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) { @@ -3262,10 +3535,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_REP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//REP\n"); -#endif + if (!m_SwitchStack.empty()) + ++m_SwitchStack.back().isInLoop; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//REP\n"); + } //Need to handle nesting. //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx @@ -3281,10 +3557,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_ENDREP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDREP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDREP\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "RepCounter--;\n"); @@ -3292,14 +3569,19 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); bcatcstr(glsl, "}\n"); + if (!m_SwitchStack.empty()) + --m_SwitchStack.back().isInLoop; break; } case OPCODE_LOOP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); + } + if (!m_SwitchStack.empty()) + ++m_SwitchStack.back().isInLoop; psContext->AddIndentation(); if (psInst->ui32NumOperands == 2) @@ -3346,7 +3628,8 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals TranslateInstruction(psInst->m_LoopInductors[0], true); } bcatcstr(glsl, " ; "); - bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType != INSTRUCTION_TEST_NONZERO; + bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType + != psInst->m_LoopInductors[2]->eBooleanTestType; bool negateOrder = false; // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): @@ -3448,41 +3731,72 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_ENDLOOP: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "}\n"); + if (!m_SwitchStack.empty()) + --m_SwitchStack.back().isInLoop; break; } case OPCODE_BREAK: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "break;\n"); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); + } + if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) + { + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + } + else + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + if (conditionalsInfo.size() > 0) + { + conditionalsInfo.back().breakEncountered = true; + ++conditionalsInfo.back().breakCount; + } + } break; } case OPCODE_BREAKC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); + } psContext->AddIndentation(); - TranslateConditional(psInst, glsl); + if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) + { + TranslateConditional(psInst, glsl); + } + else + { + // This way we won't emit a "break" when we're transforming a "switch" into if/else for ES2 + OPCODE_TYPE opcode = psInst->eOpcode; + psInst->eOpcode = OPCODE_IF; + TranslateConditional(psInst, glsl); + psInst->eOpcode = opcode; + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(1, true, true)); + } break; } case OPCODE_CONTINUEC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -3490,22 +3804,31 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_IF: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); ++psContext->indent; + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); + } + break; } case OPCODE_RETC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -3514,23 +3837,55 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_ELSE: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "} else {\n"); psContext->indent++; + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); + } break; } case OPCODE_ENDSWITCH: + { + const bool endsSwitch = m_SwitchStack.empty(); + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDSWITCH\n"); + } + if (endsSwitch) + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + if (!endsSwitch) + m_SwitchStack.pop_back(); + break; + } case OPCODE_ENDIF: { --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "}\n"); + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.pop_back(); + } break; } case OPCODE_CONTINUE: @@ -3543,7 +3898,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { --psContext->indent; psContext->AddIndentation(); - bcatcstr(glsl, "default:\n"); + if (m_SwitchStack.empty()) + bcatcstr(glsl, "default:\n"); + else + bcatcstr(glsl, "} else {\n"); ++psContext->indent; break; } @@ -3555,10 +3913,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); + } if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) { @@ -3579,49 +3938,73 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_SWITCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "switch(int("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")){\n"); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); + } + if (psContext->psShader->eTargetLanguage != LANG_ES_100) + { + psContext->AddIndentation(); + bcatcstr(glsl, "switch("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, "){\n"); - psContext->indent += 2; + psContext->indent += 2; + } + else + { + // GLSL ES2 doesn't support switch, need to convert to if/else if/else + SwitchConversion conversion; + TranslateOperand(conversion.switchOperand, &psInst->asOperands[0], TO_FLAG_INTEGER); + m_SwitchStack.push_back(conversion); + ++psContext->indent; + } break; } case OPCODE_CASE: { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); -#endif - psContext->AddIndentation(); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); + } + if (m_SwitchStack.empty()) + { + --psContext->indent; + psContext->AddIndentation(); - bcatcstr(glsl, "case "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ":\n"); + bcatcstr(glsl, "case "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); - ++psContext->indent; + ++psContext->indent; + } + else + { + bstring operand = bfromcstr(""); + TranslateOperand(operand, &psInst->asOperands[0], TO_FLAG_INTEGER); + m_SwitchStack.back().currentCaseOperands.push_back(operand); + } break; } case OPCODE_EQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); + } AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); break; } case OPCODE_USHR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); + } CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); break; } @@ -3629,10 +4012,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -3645,10 +4029,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_ISHR: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -3662,13 +4047,14 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_LD_MS: { const ResourceBinding* psBinding = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); + } psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); @@ -3677,10 +4063,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_DISCARD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); + } psContext->AddIndentation(); if (psContext->psShader->ui32MajorVersion <= 3) { @@ -3699,29 +4086,43 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) { - bcatcstr(glsl, "if(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")==0){discard;}\n"); + const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; + const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; + const bool useDirectTest = isBool && forceNoBoolUpscale; + bcatcstr(glsl, "if("); + bcatcstr(glsl, useDirectTest ? "!" : "("); + TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); + if (!useDirectTest) + bcatcstr(glsl, ")==0"); + bcatcstr(glsl, "){discard;}\n"); } else { ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")!=0){discard;}\n"); + const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; + const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; + const bool useDirectTest = isBool && forceNoBoolUpscale; + bcatcstr(glsl, "if("); + if (!useDirectTest) + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); + if (!useDirectTest) + bcatcstr(glsl, ")!=0"); + bcatcstr(glsl, "){discard;}\n"); } break; } case OPCODE_LOD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); + } //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, &numParenthesis); //If the core language does not have query-lod feature, //then the extension is used. The name of the function @@ -3753,10 +4154,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EVAL_CENTROID: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtCentroid("); @@ -3769,10 +4171,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EVAL_SAMPLE_INDEX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtSample("); @@ -3787,10 +4190,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_EVAL_SNAPPED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtOffset("); @@ -3805,19 +4209,21 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_LD_STRUCTURED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); + } TranslateShaderStorageLoad(psInst); break; } case OPCODE_LD_UAV_TYPED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); + } Operand* psDest = &psInst->asOperands[0]; Operand* psSrc = &psInst->asOperands[2]; Operand* psSrcAddr = &psInst->asOperands[1]; @@ -3873,31 +4279,33 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "imageLoad("); TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); bcatcstr(glsl, ", "); TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); bcatcstr(glsl, ")"); - TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + TranslateOperandSwizzleWithMask(psContext, psSrc, psDest->ui32CompMask, 0); AddAssignPrologue(numParenthesis); break; } case OPCODE_STORE_RAW: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); + } TranslateShaderStorageStore(psInst); break; } case OPCODE_STORE_STRUCTURED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); + } TranslateShaderStorageStore(psInst); break; } @@ -3908,10 +4316,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals int foundResource; uint32_t flags = TO_FLAG_INTEGER; uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); + } psContext->AddIndentation(); foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, @@ -3960,10 +4369,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_LD_RAW: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); + } TranslateShaderStorageLoad(psInst); break; @@ -4000,13 +4410,14 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); + } // Need to open this up, GLSL bitfieldextract uses same offset and width for all components for (i = 0; i < 4; i++) { @@ -4015,7 +4426,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); psInst->asOperands[0].ui32CompMask = (1 << i); psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], dataType, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], dataType, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "bitfieldExtract("); TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); @@ -4033,12 +4444,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); int numParenthesis = 0; -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); bcatcstr(glsl, "(1.0) / "); bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); @@ -4052,10 +4464,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); + } for (int i = 0; i < 4; i++) { @@ -4064,7 +4477,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); psInst->asOperands[0].ui32CompMask = (1 << i); psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "packHalf2x16(vec2("); TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); @@ -4077,10 +4490,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); + } for (int i = 0; i < 4; i++) { @@ -4089,7 +4503,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals psContext->AddIndentation(); psInst->asOperands[0].ui32CompMask = (1 << i); psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "unpackHalf2x16("); TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); @@ -4101,14 +4515,15 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_INEG: { int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); + } //dest = 0 - src0 psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "0 - "); TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); @@ -4119,10 +4534,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_DERIV_RTX_FINE: case OPCODE_DERIV_RTX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); + } CallHelper1("dFdx", psInst, 0, 1, 1); break; } @@ -4130,28 +4546,31 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_DERIV_RTY_FINE: case OPCODE_DERIV_RTY: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); + } CallHelper1("dFdy", psInst, 0, 1, 1); break; } case OPCODE_LRP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); + } CallHelper3("mix", psInst, 0, 2, 3, 1, 1); break; } case OPCODE_DP2ADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = dot(vec2("); @@ -4165,10 +4584,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_POW: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); + } psContext->AddIndentation(); TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = pow(abs("); @@ -4181,12 +4601,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_IMM_ATOMIC_ALLOC: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, "atomicAdd("); else @@ -4202,12 +4623,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_IMM_ATOMIC_CONSUME: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); if (isVulkan || avoidAtomicCounter) bcatcstr(glsl, "(atomicAdd("); else @@ -4224,17 +4646,18 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals case OPCODE_NOT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); + } // Adreno 3xx fails on ~a with "Internal compiler error: unexpected operator", use op_not instead if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage) || psContext->psShader->eTargetLanguage == LANG_ES_300) { UseExtraFunctionDependency("op_not"); psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "op_not("); numParenthesis++; TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); @@ -4243,9 +4666,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals else { psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "~"); + bcatcstr(glsl, "~("); + numParenthesis++; TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); AddAssignPrologue(numParenthesis); } @@ -4253,10 +4677,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); + } CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); break; } @@ -4265,10 +4690,11 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals uint32_t destElem; uint32_t mask = psInst->asOperands[0].GetAccessMask(); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); + } for (destElem = 0; destElem < 4; ++destElem) { @@ -4280,12 +4706,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_BUFINFO: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); bcatcstr(glsl, "_buf.length()"); AddAssignPrologue(numParenthesis); @@ -4293,13 +4720,14 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals } case OPCODE_SAMPLE_INFO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); + } const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, "textureSamples("); std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); if (psContext->IsVulkan()) @@ -4357,7 +4785,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals { const bool generateWorkaround = (i == 0); psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, psInst->ui32PreciseMask, &numParenthesis); bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); diff --git a/src/toGLSLOperand.cpp b/src/toGLSLOperand.cpp index 40f163c..55b8db5 100644 --- a/src/toGLSLOperand.cpp +++ b/src/toGLSLOperand.cpp @@ -324,10 +324,11 @@ void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint3 } } -static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) +static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents, bool &needsBitcastOp) { if (psContext->psShader->eTargetLanguage == LANG_METAL) { + needsBitcastOp = false; std::ostringstream oss; oss << "as_type<"; oss << GetConstructorForTypeMetal(to, numComponents); @@ -336,6 +337,7 @@ static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARI } else { + needsBitcastOp = true; if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) return "intBitsToFloat"; else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) @@ -351,9 +353,8 @@ static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARI } // Helper function to print out a single 32-bit immediate value in desired format -static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value, SHADER_VARIABLE_TYPE eType) +static void printImmediate32(HLSLCrossCompilerContext *psContext, bstring glsl, uint32_t value, SHADER_VARIABLE_TYPE eType) { - bstring glsl = *psContext->currentGLSLString; int needsParenthesis = 0; // Print floats as bit patterns. @@ -379,12 +380,10 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) bformata(glsl, "int(0x%Xu)", value); else - bformata(glsl, "0x%X", value); + bformata(glsl, "%d", value); } - else if (value <= 1024) // Print anything below 1024 as decimal, and hex after that - bformata(glsl, "%d", value); else - bformata(glsl, "0x%X", value); + bformata(glsl, "%d", value); break; case SVT_UINT: case SVT_UINT16: @@ -409,9 +408,9 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value bcatcstr(glsl, ")"); } -void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) +void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) { - TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase); + TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase, forceNoConversion); } void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType) @@ -494,7 +493,7 @@ void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLAS m_FunctionDefinitionsOrder.push_back(psName); } -void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) +void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) { int numParenthesis = 0; int hasCtor = 0; @@ -566,6 +565,8 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan requestedComponents = std::max(requestedComponents, numComponents); + bool needsBitcastOp = false; + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) { if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) @@ -582,7 +583,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) { hasCtor = 1; - if (eType == SVT_BOOL) + if (eType == SVT_BOOL && !forceNoConversion) { needsBoolUpscale = 1; // make sure to wrap the whole thing in parens so the upscale @@ -590,13 +591,24 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan bcatcstr(glsl, "("); numParenthesis++; } + + // case 1154828: In case of OPERAND_TYPE_INPUT_PRIMITIVEID we end up here with requestedComponents == 0, GetConstructorForType below would return empty string and we miss the cast to uint + if (requestedComponents < 1) + requestedComponents = 1; + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); numParenthesis++; } else { // Direct cast not possible, need to do bitcast. - bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents).c_str()); + if (IsESLanguage(psContext->psShader->eTargetLanguage) && (requestedType == SVT_UINT)) + { + // without explicit cast Adreno may treat the return type of floatBitsToUint as signed int (case 1256567) + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + } + bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents, /*out*/ needsBitcastOp).c_str()); numParenthesis++; } } @@ -619,7 +631,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan { if (psOperand->iNumComponents == 1) { - printImmediate32(psContext, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + printImmediate32(psContext, glsl, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); } else { @@ -640,7 +652,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (firstItemAdded) bcatcstr(glsl, ", "); uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); - printImmediate32(psContext, uval, requestedType); + printImmediate32(psContext, glsl, uval, requestedType); firstItemAdded = 1; } bcatcstr(glsl, ")"); @@ -682,7 +694,8 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POSITION" && psSig->ui32SemanticIndex == 0)) { bcatcstr(glsl, "gl_in"); TranslateOperandIndex(psOperand, 0);//Vertex index @@ -752,7 +765,16 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan { int stream = 0; std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + + // If we are writing out to built in type then we need to redirect tot he built in arrays + // this is safe to do as HLSL enforces 1:1 mapping, so output maps to gl_InvocationID by default + if (name == "gl_Position" && psContext->psShader->eShaderType == HULL_SHADER) + { + bcatcstr(glsl, "gl_out[gl_InvocationID]."); + } + bcatcstr(glsl, name.c_str()); + if (psOperand->m_SubOperands[0].get()) { bcatcstr(glsl, "["); @@ -1327,7 +1349,26 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan case OPERAND_TYPE_NULL: { // Null register, used to discard results of operations - bcatcstr(glsl, "//null"); + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + // On ES2 we can pass this as an argument to a function, e.g. fake integer operations that we do. See case 1124159. + bcatcstr(glsl, "null"); + bool alreadyDeclared = false; + std::string toDeclare = "vec4 null;"; + for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) + { + if (toDeclare == m_AdditionalDefinitions[i]) + { + alreadyDeclared = true; + break; + } + } + + if (!alreadyDeclared) + m_AdditionalDefinitions.push_back(toDeclare); + } + else + bcatcstr(glsl, "//null"); break; } case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: @@ -1564,6 +1605,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan *pui32IgnoreSwizzle = 1; } + if (needsBitcastOp && (*pui32IgnoreSwizzle == 0)) + { + // some glsl compilers (Switch's GLSLc) emit warnings "u_xlat.w uninitialized" if generated code looks like: "floatBitsToUint(u_xlat).xz". Instead, generate: "floatBitsToUint(u_xlat.xz)" + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + if (needsBoolUpscale) { if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) @@ -1573,7 +1621,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) bcatcstr(glsl, ") * int(0xffffffffu)"); else - bcatcstr(glsl, ") * int(0xffff)"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16) + bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16) } numParenthesis--; @@ -1588,12 +1636,12 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan } } -void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) +void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) { - TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask); + TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask, forceNoConversion); } -void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) +void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) { uint32_t ui32IgnoreSwizzle = 0; int iRebase = 0; @@ -1615,7 +1663,7 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u if (ui32TOFlag & TO_FLAG_NAME_ONLY) { - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase, forceNoConversion); return; } @@ -1642,7 +1690,7 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u } } - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase, forceNoConversion); if (psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE diff --git a/src/toMetal.cpp b/src/toMetal.cpp index b695a77..d66f55e 100644 --- a/src/toMetal.cpp +++ b/src/toMetal.cpp @@ -68,7 +68,10 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; psContext->AddIndentation(); - bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + bformata(glsl, "%s%s = %scp[controlPointID].%s;\n", psContext->outputPrefix, "mtl_Position", psContext->inputPrefix, "mtl_Position"); + else + bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); } } @@ -85,6 +88,8 @@ bool ToMetal::Translate() psShader->ExpandSWAPCs(); psShader->ForcePositionToHighp(); psShader->AnalyzeIOOverlap(); + if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) + psShader->SetMaxSemanticIndex(); psShader->FindUnusedGlobals(psContext->flags); psContext->indent = 0; @@ -136,7 +141,7 @@ bool ToMetal::Translate() ShaderPhase &phase = psShader->asPhases[ui32Phase]; phase.UnvectorizeImmMoves(); psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(); + phase.ResolveUAVProperties(psShader->sInfo); ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan HLSLcc::DoLoopTransform(psContext, phase); } @@ -189,9 +194,10 @@ bool ToMetal::Translate() continue; psContext->currentPhase = ui32Phase; -#ifdef _DEBUG - // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + } for (i = 0; i < psPhase->psDecl.size(); ++i) { TranslateDeclaration(&psPhase->psDecl[i]); @@ -205,9 +211,13 @@ bool ToMetal::Translate() } else { + psContext->indent++; + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + psContext->indent--; + // Output default implementations for framebuffer index remap if needed if (m_NeedFBOutputRemapDecl) bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); @@ -383,6 +393,13 @@ bool ToMetal::Translate() else if (psShader->eShaderType == HULL_SHADER) mem.second.assign("// mtl_InstanceID passed through groupID"); } + else if (mem.first == "mtl_BaseInstance") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_BaseInstance"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_BaseInstance ignored"); + } else if (mem.first == "mtl_VertexID") { if (psShader->eShaderType == VERTEX_SHADER) @@ -392,6 +409,15 @@ bool ToMetal::Translate() else if (psShader->eShaderType == DOMAIN_SHADER) mem.second.assign("// mtl_VertexID unused"); } + else if (mem.first == "mtl_BaseVertex") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_BaseVertex"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_BaseVertex generated in compute kernel"); + else if (psShader->eShaderType == DOMAIN_SHADER) + mem.second.assign("// mtl_BaseVertex unused"); + } }); } @@ -467,6 +493,23 @@ bool ToMetal::Translate() bcatcstr(bodyglsl, ",\n"); } + // Figure and declare counters and their binds (we also postponed buffer reflection until now) + for (auto it = m_BufferReflections.begin(); it != m_BufferReflections.end(); ++it) + { + uint32_t bind = it->second.bind; + if (it->second.hasCounter) + { + const uint32_t counterBind = m_BufferSlots.PeekFirstFreeSlot(); + m_BufferSlots.ReserveBindingSlot(counterBind, BindingSlotAllocator::UAV); + + bformata(bodyglsl, ",\n\t\tdevice atomic_uint* %s_counter [[ buffer(%d) ]]", it->first.c_str(), counterBind); + + // Offset with 1 so we can capture counters that are bound to slot 0 (if, say, user decides to start buffers at register 1 or higher) + bind |= ((counterBind + 1) << 16); + } + psContext->m_Reflection.OnBufferBinding(it->first, bind, it->second.isUAV); + } + bcatcstr(bodyglsl, ")\n{\n"); if (popPragmaDiagnostic) @@ -474,6 +517,33 @@ bool ToMetal::Translate() if (psShader->eShaderType != COMPUTE_SHADER) { + if (psShader->eShaderType == VERTEX_SHADER) + { + // Fix HLSL compatibility with DrawProceduralIndirect, SV_InstanceID always starts at 0 but with Metal, a base instance was not subtracted for equal behavior + // Base semantics available everywhere starting with iOS9 (except hardware limitation exists with the original Apple A7/A8 GPUs, causing UNITY_SUPPORT_INDIRECT_BUFFERS=0) + std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&](MemberDefinitions::value_type &mem) + { + if (mem.first == "mtl_InstanceID") + { + bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_BaseInstance = 0;\n"); + bcatcstr(bodyglsl, "#endif\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_InstanceID = mtl_InstanceID - mtl_BaseInstance;\n"); + } + else if (mem.first == "mtl_VertexID") + { + bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_BaseVertex = 0;\n"); + bcatcstr(bodyglsl, "#endif\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_VertexID = mtl_VertexID - mtl_BaseVertex;\n"); + } + }); + } + if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) { psContext->AddIndentation(); @@ -498,7 +568,9 @@ bool ToMetal::Translate() bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n"); + bcatcstr(bodyglsl, "const uint mtl_BaseInstance = 0;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y - mtl_BaseInstance;\n"); psContext->AddIndentation(); bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); psContext->AddIndentation(); @@ -507,7 +579,9 @@ bool ToMetal::Translate() psContext->AddIndentation(); bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_VertexID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n"); + bcatcstr(bodyglsl, "const uint mtl_BaseVertex = 0;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_VertexID = ((mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x) - mtl_BaseVertex;\n"); psContext->AddIndentation(); bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); @@ -563,15 +637,19 @@ bool ToMetal::Translate() if (psPhase->earlyMain->slen > 1) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + } + bconcat(bodyglsl, psPhase->earlyMain); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + } } psContext->AddIndentation(); @@ -618,15 +696,19 @@ bool ToMetal::Translate() if (psPhase->hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Post shader code ---\n"); + } + bconcat(bodyglsl, psPhase->postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End post shader code ---\n"); + } } if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) @@ -676,15 +758,19 @@ bool ToMetal::Translate() { if (psContext->psShader->asPhases[0].earlyMain->slen > 1) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + } + bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + } } for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) diff --git a/src/toMetalDeclaration.cpp b/src/toMetalDeclaration.cpp index 1087a5d..73a0cd0 100644 --- a/src/toMetalDeclaration.cpp +++ b/src/toMetalDeclaration.cpp @@ -2,6 +2,8 @@ #include "internal_includes/debug.h" #include "internal_includes/HLSLccToolkit.h" #include "internal_includes/Declaration.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/languages.h" #include #include #include @@ -19,49 +21,49 @@ using namespace HLSLcc; bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) { - if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_TessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 3); - std::ostringstream oss; - oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_InsideTessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 1); - std::ostringstream oss; - oss << "tessFactor.insideTessellationFactor"; - if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) - oss << "[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (sig && sig->semanticName == "SV_InstanceID") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - } - - if (sig && ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && - ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) - { - result = "mtl_Position"; - return true; - } - if (sig) { + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "tessFactor.insideTessellationFactor"; + if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) + oss << "[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (sig->semanticName == "SV_InstanceID") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + + if (((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && + ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) + { + result = "mtl_Position"; + return true; + } + switch (sig->eSystemValueType) { case NAME_POSITION: @@ -120,15 +122,15 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I default: break; } - } - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || - psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) - { - std::ostringstream oss; - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - return true; + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } } switch (psOperand->eType) @@ -176,7 +178,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I case OPERAND_TYPE_INPUT: { std::ostringstream oss; - ASSERT(sig != NULL); + ASSERT(sig != nullptr); oss << sig->semanticName << sig->ui32SemanticIndex; result = oss.str(); if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) @@ -186,6 +188,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I case OPERAND_TYPE_INPUT_PATCH_CONSTANT: { std::ostringstream oss; + ASSERT(sig != nullptr); oss << sig->semanticName << sig->ui32SemanticIndex; result = oss.str(); if (outSkipPrefix != NULL) *outSkipPrefix = true; @@ -194,6 +197,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I case OPERAND_TYPE_INPUT_CONTROL_POINT: { std::ostringstream oss; + ASSERT(sig != nullptr); oss << sig->semanticName << sig->ui32SemanticIndex; result = oss.str(); if (outSkipPrefix != NULL) *outSkipPrefix = true; @@ -242,6 +246,7 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) break; case NAME_INSTANCE_ID: m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseInstance", "uint mtl_BaseInstance [[ base_instance ]]")); // Requires Metal runtime 1.1+ break; case NAME_IS_FRONT_FACE: m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); @@ -251,6 +256,7 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) break; case NAME_VERTEX_ID: m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseVertex", "uint mtl_BaseVertex [[ base_vertex ]]")); // Requires Metal runtime 1.1+ break; case NAME_PRIMITIVE_ID: // Not on Metal @@ -345,7 +351,6 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); break; case NAME_RENDER_TARGET_ARRAY_INDEX: - // Only supported on a Mac m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); break; case NAME_CLIP_DISTANCE: @@ -404,6 +409,8 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) ASSERT(0); // Wut break; } + + psContext->m_Reflection.OnBuiltinOutput(psDecl->asOperands[0].eSpecialName); } static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents) @@ -467,6 +474,9 @@ void ToMetal::DeclareHullShaderPassthrough() name = oss.str(); } + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + name = "mtl_Position"; + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); @@ -488,7 +498,8 @@ void ToMetal::DeclareHullShaderPassthrough() oss << typeName << " " << name; // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); oss << " [[ " << "attribute(" << loc << ")" << " ]] "; psContext->m_Reflection.OnInputBinding(name, loc); @@ -717,9 +728,6 @@ static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psCont if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) { access = "write"; - if (psContext->psShader->eShaderType != COMPUTE_SHADER) - psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of texture writes on non-compute shaders.", 0, false); - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) { access = "read_write"; @@ -1099,11 +1107,6 @@ void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool BufConst = "const "; oss << BufConst; } - else - { - if (psContext->psShader->eShaderType != COMPUTE_SHADER) - psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); - } if (isRaw) oss << "device uint *" << BufName; @@ -1114,23 +1117,12 @@ void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool oss << " [[ buffer(" << loc << ") ]]"; m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); - psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); + + // We don't do REAL reflection here, we need to collect all data and figure out if we're dealing with counters. + // And if so - we need to patch counter binding info, add counters to empty slots, etc + const BufferReflection br = { loc, isUAV, psDecl->sUAV.bCounter != 0 }; + m_BufferReflections.insert(std::make_pair(BufName, br)); } - - // In addition to the actual declaration, we need pointer modification and possible counter declaration - // in early main: - - // Possible counter is always in the beginning of the buffer - if (isUAV && psDecl->sUAV.bCounter) - { - bformata(GetEarlyMain(psContext), "device atomic_uint *%s_counter = reinterpret_cast (%s);\n", BufName.c_str(), BufName.c_str()); - } - - // Some GPUs don't allow memory access below buffer binding offset in the shader so always bind compute buffer - // at offset 0 instead of GetDataOffset(). - // We can't tell at shader compile time if the buffer actually has counter or not. Therefore we'll always reserve - // space for the counter and bump the data pointer to beginning of the actual data here. - bformata(GetEarlyMain(psContext), "%s = reinterpret_cast<%sdevice %s *> (reinterpret_cast (%s) + 1);\n", BufName.c_str(), BufConst.c_str(), (isRaw ? "uint" : BufType.c_str()), BufConst.c_str(), BufName.c_str()); } static int ParseInlineSamplerWrapMode(const std::string& samplerName, const std::string& wrapName) @@ -1185,6 +1177,11 @@ static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::st return false; } + // Starting with macOS 11/iOS 14, the metal compiler will warn about unused inline samplers, that might + // happen on mobile due to _mtl_xl_shadow_sampler workaround that's required for pre-GPUFamily3. + if (hasCompare && IsMobileTarget(psContext)) + return true; + bstring str = GetEarlyMain(psContext); bformata(str, "constexpr sampler %s(", name.c_str()); @@ -1194,7 +1191,7 @@ static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::st if (hasTrilinear) bformata(str, "filter::linear,mip_filter::linear,"); else if (hasLinear) - bformata(str, "filter::linear,"); + bformata(str, "filter::linear,mip_filter::nearest,"); else bformata(str, "filter::nearest,"); @@ -1276,7 +1273,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) } //Already declared as part of an array. - if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + if (psDecl->eOpcode == OPCODE_DCL_INPUT && psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) { break; } @@ -1401,7 +1398,8 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl) { std::ostringstream oss; // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex); oss << "attribute(" << loc << ")"; semantic = oss.str(); psContext->m_Reflection.OnInputBinding(name, loc); @@ -2391,6 +2389,7 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; m_NeedFBOutputRemapDecl = true; m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + psContext->m_Reflection.OnFragmentOutputDeclaration(iNumComponents, psSignature->ui32SemanticIndex); } } break; @@ -2412,6 +2411,9 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) oss << " [[ user(" << name << ") ]]"; m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + if (psContext->psShader->eShaderType == VERTEX_SHADER) + psContext->m_Reflection.OnVertexProgramOutput(name, psSignature->semanticName, psSignature->ui32SemanticIndex); + // For preserving data layout, declare output struct as domain shader input, too if (psContext->psShader->eShaderType == HULL_SHADER) { @@ -2421,7 +2423,8 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) oss << type << " " << name; // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); oss << " [[ " << "attribute(" << loc << ")" << " ]] "; psContext->m_Reflection.OnInputBinding(name, loc); @@ -2439,7 +2442,8 @@ void ToMetal::DeclareOutput(const Declaration *psDecl) void ToMetal::EnsureShadowSamplerDeclared() { - if (m_ShadowSamplerDeclared) + // on macos we will set comparison func from the app side + if (m_ShadowSamplerDeclared || !IsMobileTarget(psContext)) return; if ((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) diff --git a/src/toMetalInstruction.cpp b/src/toMetalInstruction.cpp index f00fe7b..a4c0645 100644 --- a/src/toMetalInstruction.cpp +++ b/src/toMetalInstruction.cpp @@ -20,128 +20,68 @@ bstring operator<<(bstring a, const std::string &b) return a; } +static void AddOpAssignToDest(bstring glsl, SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, SHADER_VARIABLE_TYPE eDestType, uint32_t ui32DestElementCount, uint32_t precise, int& numParenthesis, bool allowReinterpretCast = true) +{ + numParenthesis = 0; + + // Find out from type the precisions and types without precision + RESOURCE_RETURN_TYPE srcBareType = SVTTypeToResourceReturnType(eSrcType); + RESOURCE_RETURN_TYPE dstBareType = SVTTypeToResourceReturnType(eDestType); + REFLECT_RESOURCE_PRECISION srcPrec = SVTTypeToPrecision(eSrcType); + REFLECT_RESOURCE_PRECISION dstPrec = SVTTypeToPrecision(eDestType); + + // Add assigment + bcatcstr(glsl, " = "); + + /* TODO: implement precise for metal + if (precise) + { + bcatcstr(glsl, "u_xlat_precise("); + numParenthesis++; + }*/ + + // Special reinterpret cast between float<->uint/int if size matches + // TODO: Handle bools? + if (srcBareType != dstBareType && (srcBareType == RETURN_TYPE_FLOAT || dstBareType == RETURN_TYPE_FLOAT) && srcPrec == dstPrec && allowReinterpretCast) + { + bformata(glsl, "as_type<%s>(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); + numParenthesis++; + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + numParenthesis++; + } + return; + } + + // Do cast in case of type missmatch or dimension + if (eSrcType != eDestType || ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); + numParenthesis++; + return; + } +} + // This function prints out the destination name, possible destination writemask, assignment operator // and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) // As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed // and pSrcCount will be filled with the number of components expected // ui32CompMask can be used to only write to 1 or more components (used by MOVC) void ToMetal::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask) { uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); bstring glsl = *psContext->currentGLSLString; - SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); - ASSERT(pNeedsParenthesis != NULL); - - *pNeedsParenthesis = 0; - + SHADER_VARIABLE_TYPE eDestType = psDest->GetDataType(psContext); glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); - - // Simple path: types match. - if (eDestDataType == eSrcType) - { - // Cover cases where the HLSL language expects the rest of the components to be default-filled - // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - *pNeedsParenthesis = 1; - } - else - bformata(glsl, " %s ", szAssignmentOp); - return; - } - // Up/downscaling with cast. The monster of condition there checks if the underlying datatypes are the same, just with prec differences - if (((eDestDataType == SVT_FLOAT || eDestDataType == SVT_FLOAT16 || eDestDataType == SVT_FLOAT10) && (eSrcType == SVT_FLOAT || eSrcType == SVT_FLOAT16 || eSrcType == SVT_FLOAT10)) - || ((eDestDataType == SVT_INT || eDestDataType == SVT_INT16 || eDestDataType == SVT_INT12) && (eSrcType == SVT_INT || eSrcType == SVT_INT16 || eSrcType == SVT_INT12)) - || ((eDestDataType == SVT_UINT || eDestDataType == SVT_UINT16) && (eSrcType == SVT_UINT || eSrcType == SVT_UINT16))) - { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - *pNeedsParenthesis = 1; - return; - } - - switch (eDestDataType) - { - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - // Bitcasts from lower precisions are ambiguous - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT) - { - if (ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - case SVT_UINT: - case SVT_UINT16: - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT) - { - if (ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); - if (psContext->psShader->ui32MajorVersion > 3) - { - if (ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - default: - // TODO: Handle bools? - ASSERT(0); - break; - } + AddOpAssignToDest(glsl, eSrcType, ui32SrcElementCount, eDestType, ui32DestElementCount, precise, numParenthesis, psContext->psShader->ui32MajorVersion > 3); } void ToMetal::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis) { - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, numParenthesis, OPERAND_4_COMPONENT_MASK_ALL); } void ToMetal::AddAssignPrologue(int numParenthesis) @@ -199,7 +139,7 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, } else { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); bcatcstr(glsl, "("); @@ -236,7 +176,7 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, } else { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); bcatcstr(glsl, "("); } glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); @@ -270,7 +210,7 @@ bool ToMetal::CanForceToHalfOperand(const Operand *psOperand) return false; } -void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) +void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise) { bstring glsl = *psContext->currentGLSLString; int numParenthesis = 0; @@ -280,13 +220,13 @@ void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); uint32_t flags = SVTTypeToFlag(eSrcType); - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, numParenthesis); glsl << TranslateOperand(pSrc, flags, writeMask); AddAssignPrologue(numParenthesis); } -void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) +void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) { bstring glsl = *psContext->currentGLSLString; uint32_t destElemCount = pDest->GetNumSwizzleElements(); @@ -317,7 +257,7 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand int numParenthesis = 0; SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + AddAssignToDest(pDest, eDestType, destElemCount, precise, numParenthesis); bcatcstr(glsl, "("); if (s0Type == SVT_UINT || s0Type == SVT_UINT16) glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); @@ -356,7 +296,6 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand } else { - // TODO: We can actually do this in one op using mix(). int srcElem = -1; SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); @@ -394,7 +333,7 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand continue; psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, numParenthesis, 1 << destElem); bcatcstr(glsl, "("); if (s0Type == SVT_BOOL) { @@ -465,7 +404,7 @@ void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); /* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); @@ -508,7 +447,7 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, numParenthesis); glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); bformata(glsl, " %s ", op1); @@ -543,7 +482,7 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -587,7 +526,7 @@ void ToMetal::CallHelper2(const char* name, Instruction* psInst, } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -618,7 +557,7 @@ void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -647,7 +586,7 @@ void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -671,7 +610,7 @@ void ToMetal::CallHelper1(const char* name, Instruction* psInst, && CanForceToHalfOperand(&psInst->asOperands[src0])) ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -695,7 +634,7 @@ void ToMetal::CallHelper1Int( psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); bformata(glsl, "%s(", name); numParenthesis++; @@ -710,7 +649,7 @@ void ToMetal::TranslateTexelFetch( { int numParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); bcatcstr(glsl, ".read("); @@ -764,6 +703,11 @@ void ToMetal::TranslateTexelFetch( glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index break; } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); + return; + } case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: case REFLECT_RESOURCE_DIMENSION_BUFFEREX: @@ -787,7 +731,7 @@ void ToMetal::TranslateTexelFetchOffset( { int numParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); bcatcstr(glsl, ".read("); @@ -966,7 +910,7 @@ void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis, 1 << destElem); const char *metalGetters[] = { ".get_width(", ".get_height(", ".get_depth(", ".get_num_mip_levels()" }; int dim = GetNumTextureDimensions(psInst->eResDim); @@ -1110,7 +1054,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); @@ -1129,9 +1073,9 @@ void ToMetal::TranslateTextureSample(Instruction* psInst, } // Sampler name - //TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more - // accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage) - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + // on ios pre-GPUFamily3 we MUST have constexpr in shader for a sampler with compare func + // for now we use fixed shadow sampler in all cases of depth compare (ATM all depth compares are interpreted as shadow usage) + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE && IsMobileTarget(psContext)) { bcatcstr(glsl, "_mtl_xl_shadow_sampler"); } @@ -1417,7 +1361,7 @@ void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) srcOffFlag = TO_FLAG_INTEGER; psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); + AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, numParenthesis); if (destCount > 1) { bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); @@ -1492,17 +1436,16 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) Operand* compare = 0; int texDim = 0; bool isUint = true; - bool shouldAddFailMemoryOrder = false; - bool shouldExtractCompare = false; switch (psInst->eOpcode) { case OPCODE_IMM_ATOMIC_IADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); + } func = "atomic_fetch_add_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1512,10 +1455,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); + } func = "atomic_fetch_add_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1524,10 +1468,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_AND: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); + } func = "atomic_fetch_and_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1537,10 +1482,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_AND: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); + } func = "atomic_fetch_and_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1549,10 +1495,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_OR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); + } func = "atomic_fetch_or_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1562,10 +1509,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_OR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); + } func = "atomic_fetch_or_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1574,10 +1522,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); + } func = "atomic_fetch_xor_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1587,10 +1536,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); + } func = "atomic_fetch_xor_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1600,10 +1550,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) case OPCODE_IMM_ATOMIC_EXCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); + } func = "atomic_exchange_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1613,42 +1564,41 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_CMP_EXCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); + } func = "atomic_compare_exchange_weak_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; destAddr = &psInst->asOperands[2]; compare = &psInst->asOperands[3]; src = &psInst->asOperands[4]; - shouldAddFailMemoryOrder = true; - shouldExtractCompare = true; break; } case OPCODE_ATOMIC_CMP_STORE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); + } func = "atomic_compare_exchange_weak_explicit"; previousValue = 0; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; compare = &psInst->asOperands[2]; src = &psInst->asOperands[3]; - shouldAddFailMemoryOrder = true; - shouldExtractCompare = true; break; } case OPCODE_IMM_ATOMIC_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); + } func = "atomic_fetch_min_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1658,10 +1608,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); + } func = "atomic_fetch_min_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1670,10 +1621,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); + } func = "atomic_fetch_min_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1683,10 +1635,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); + } func = "atomic_fetch_min_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1695,10 +1648,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); + } func = "atomic_fetch_max_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1708,10 +1662,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); + } func = "atomic_fetch_max_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1720,10 +1675,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_IMM_ATOMIC_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); + } func = "atomic_fetch_max_explicit"; previousValue = &psInst->asOperands[0]; dest = &psInst->asOperands[1]; @@ -1733,10 +1689,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) } case OPCODE_ATOMIC_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); + } func = "atomic_fetch_max_explicit"; dest = &psInst->asOperands[0]; destAddr = &psInst->asOperands[1]; @@ -1797,7 +1754,7 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) else ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; - if (shouldExtractCompare) + if (compare) { bcatcstr(glsl, "{\n"); ++psContext->indent; @@ -1808,7 +1765,7 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) psContext->AddIndentation(); } else if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, func); bcatcstr(glsl, "("); @@ -1843,21 +1800,11 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) bcatcstr(glsl, "]), "); if (compare) - { - if (shouldExtractCompare) - { - bcatcstr(glsl, "&compare_value, "); - } - else - { - glsl << TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ", "); - } - } + bcatcstr(glsl, "&compare_value, "); glsl << TranslateOperand(src, ui32DataTypeFlag); bcatcstr(glsl, ", memory_order::memory_order_relaxed"); - if (shouldAddFailMemoryOrder) + if (compare) bcatcstr(glsl, ", memory_order::memory_order_relaxed"); bcatcstr(glsl, ")"); if (previousValue) @@ -1867,12 +1814,12 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst) else bcatcstr(glsl, ";\n"); - if (shouldExtractCompare) + if (compare) { if (previousValue) { psContext->AddIndentation(); - AddAssignToDest(previousValue, SVT_UINT, 1, &numParenthesis); + AddAssignToDest(previousValue, SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "compare_value"); AddAssignPrologue(numParenthesis); } @@ -1960,17 +1907,18 @@ void ToMetal::TranslateInstruction(Instruction* psInst) bstring glsl = *psContext->currentGLSLString; int numParenthesis = 0; -#ifdef _DEBUG - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); -#if 0 - if (psInst->id == 73) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); + #if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } + #endif } -#endif -#endif switch (psInst->eOpcode) { @@ -1980,13 +1928,14 @@ void ToMetal::TranslateInstruction(Instruction* psInst) uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); + } switch (psInst->asOperands[0].eMinPrecision) { case OPERAND_MIN_PRECISION_DEFAULT: @@ -2004,7 +1953,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); bcatcstr(glsl, "("); // 1 glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); @@ -2015,12 +1964,44 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_MOV: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); -#endif - psContext->AddIndentation(); - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1]); + + // UNITY SPECIFIC: you can check case 1158280 + // This looks like a hack because it is! There is a bug that is quite hard to reproduce. + // When doing data analysis we assume that immediates are ints and hope it will be promoted later + // which is kinda fine unless there is an unfortunate combination happening: + // We operate on 4-component registers - we need different components to be treated as float/int + // but we should not use float operations (as this will mark register as float) + // instead "float" components should be used for MOV and friends to other registers + // and they, in turn, should be used for float ops + // In pseudocode it can look like this: + // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly + // bool foo = var1.z | <...> // marked as int + // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints + // var1.w = 1 // var1 is marked int + // What is important is that this temporary is marked as int by us but DX compiler treats it + // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] + // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) + // + // At this point we have mov immediate to int temp (which should really be float temp) + { + Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; + if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int + && pDst->eType == OPERAND_TYPE_TEMP // dst is temp + && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate + && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated + ) + { + pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; + } + } + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask); break; } case OPCODE_ITOF://signed to float @@ -2030,17 +2011,14 @@ void ToMetal::TranslateInstruction(Instruction* psInst) uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - bcatcstr(glsl, "//ITOF\n"); + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + bcatcstr(glsl, "//ITOF\n"); + else + bcatcstr(glsl, "//UTOF\n"); } - else - { - bcatcstr(glsl, "//UTOF\n"); - } -#endif switch (psInst->asOperands[0].eMinPrecision) { @@ -2057,7 +2035,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); bcatcstr(glsl, "("); // 1 glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); @@ -2067,20 +2045,22 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_MAD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); + } CallHelper3("fma", psInst, 0, 1, 2, 3, 1); break; } case OPCODE_IMAD: { uint32_t ui32Flags = TO_FLAG_INTEGER; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -2093,29 +2073,32 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_DFMA: { uint32_t ui32Flags = TO_FLAG_DOUBLE; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DFMA\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DFMA\n"); + } CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); break; } case OPCODE_DADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); + } CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); break; } case OPCODE_IADD: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } //Is this a signed or unsigned add? if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -2126,27 +2109,29 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_ADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); + } CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_OR: { /*Todo: vector version */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) { uint32_t destMask = psInst->asOperands[0].GetAccessMask(); int needsParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " || "); glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); @@ -2160,19 +2145,20 @@ void ToMetal::TranslateInstruction(Instruction* psInst) { SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); + } uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); uint32_t ui32Flags = SVTTypeToFlag(eDataType); if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) { int needsParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " && "); glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); @@ -2188,7 +2174,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) if (dstSwizCount == 1) { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); bcatcstr(glsl, " ? "); glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); @@ -2196,28 +2182,23 @@ void ToMetal::TranslateInstruction(Instruction* psInst) bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) + switch (eDataType) { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - } + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); } bcatcstr(glsl, ")"); } else if (eDataType == SVT_FLOAT) { // We can use select() - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); bcatcstr(glsl, "select("); bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); bcatcstr(glsl, "("); @@ -2225,17 +2206,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) { if (i > 0) bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - } + bcatcstr(glsl, "0.0"); } bcatcstr(glsl, "), "); glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); @@ -2248,7 +2219,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } else { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); bcatcstr(glsl, "("); bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); bcatcstr(glsl, "("); @@ -2273,29 +2244,32 @@ void ToMetal::TranslateInstruction(Instruction* psInst) dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_NONE); break; } case OPCODE_MUL: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); + } CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_IMUL: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); + } if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) { eType = SVT_UINT; @@ -2308,10 +2282,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_UDIV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); + } //destQuotient, destRemainder, src0, src1 // There are cases where destQuotient is the same variable as src0 or src1. If that happens, @@ -2331,19 +2306,21 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_DIV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); + } CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); break; } case OPCODE_SINCOS: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); + } // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) @@ -2379,10 +2356,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_DP2: { int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); + } psContext->AddIndentation(); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; @@ -2393,7 +2371,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) if (dstType != SVT_FLOAT16) dstType = SVT_FLOAT; - AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "dot("); glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); bcatcstr(glsl, ", "); @@ -2405,10 +2383,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_DP3: { int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); + } psContext->AddIndentation(); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; @@ -2419,7 +2398,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) if (dstType != SVT_FLOAT16) dstType = SVT_FLOAT; - AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "dot("); glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); bcatcstr(glsl, ", "); @@ -2430,353 +2409,390 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_DP4: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); + } CallHelper2("dot", psInst, 0, 1, 2, 0); break; } case OPCODE_INE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); + } AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); break; } case OPCODE_NE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); + } AddComparison(psInst, CMP_NE, TO_FLAG_NONE); break; } case OPCODE_IGE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); break; } case OPCODE_ILT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); break; } case OPCODE_LT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_NONE); break; } case OPCODE_IEQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); + } AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); break; } case OPCODE_ULT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); + } AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); break; } case OPCODE_UGE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); + } AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); break; } case OPCODE_MOVC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); -#endif - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); + } + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); break; } case OPCODE_SWAPC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); + } // TODO needs temps!! ASSERT(0); - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); break; } case OPCODE_LOG: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); + } CallHelper1("log2", psInst, 0, 1, 1); break; } case OPCODE_RSQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); + } CallHelper1("rsqrt", psInst, 0, 1, 1); break; } case OPCODE_EXP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); + } CallHelper1("exp2", psInst, 0, 1, 1); break; } case OPCODE_SQRT: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); + } CallHelper1("sqrt", psInst, 0, 1, 1); break; } case OPCODE_ROUND_PI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); + } CallHelper1("ceil", psInst, 0, 1, 1); break; } case OPCODE_ROUND_NI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); + } CallHelper1("floor", psInst, 0, 1, 1); break; } case OPCODE_ROUND_Z: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); + } CallHelper1("trunc", psInst, 0, 1, 1); break; } case OPCODE_ROUND_NE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); + } CallHelper1("rint", psInst, 0, 1, 1); break; } case OPCODE_FRC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); + } CallHelper1("fract", psInst, 0, 1, 1); break; } case OPCODE_IMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); + } CallHelper2Int("max", psInst, 0, 1, 2, 1); break; } case OPCODE_UMAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); + } CallHelper2UInt("max", psInst, 0, 1, 2, 1); break; } case OPCODE_MAX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); + } CallHelper2("max", psInst, 0, 1, 2, 1); break; } case OPCODE_IMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); + } CallHelper2Int("min", psInst, 0, 1, 2, 1); break; } case OPCODE_UMIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); + } CallHelper2UInt("min", psInst, 0, 1, 2, 1); break; } case OPCODE_MIN: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); + } CallHelper2("min", psInst, 0, 1, 2, 1); break; } case OPCODE_GATHER4: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); break; } case OPCODE_GATHER4_PO_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_GATHER4_PO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); break; } case OPCODE_GATHER4_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_SAMPLE: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); break; } case OPCODE_SAMPLE_L: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); break; } case OPCODE_SAMPLE_C: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); break; } case OPCODE_SAMPLE_C_LZ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); break; } case OPCODE_SAMPLE_D: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); break; } case OPCODE_SAMPLE_B: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); + } TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); break; } case OPCODE_RET: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); + } if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } } psContext->AddIndentation(); if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) @@ -2796,23 +2812,41 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_COUNTBITS: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = popCount("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + + // in metal popcount decl is T popcount(T), so it is important that input/output types agree + // enter assembly: when writing swizzle encoding we use 0 to say "source from x" + // now, say, we generate code o.xy = bitcount(i.xy) + // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into + // popcount(i.<..>) [well, tweaking swizzle, sure] + // what does that mean is that we can safely take output component count to determine "proper" type + // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic + + const Operand* dst = &psInst->asOperands[0]; + const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; + + glsl << TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = popcount("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); bcatcstr(glsl, ");\n"); break; } case OPCODE_FIRSTBIT_HI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); + } DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); // TODO implement the 0-case (must return 0xffffffff) psContext->AddIndentation(); @@ -2824,10 +2858,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_FIRSTBIT_LO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); + } // TODO implement the 0-case (must return 0xffffffff) DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); psContext->AddIndentation(); @@ -2839,10 +2874,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_FIRSTBIT_SHI: //signed high { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); + } // TODO Not at all correct for negative values yet. DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); psContext->AddIndentation(); @@ -2854,10 +2890,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_BFREV: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); + } DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ \t\t{ UVecType x = input;\n\ \t\t\tx = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ @@ -2875,10 +2912,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_BFI: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); + } DeclareExtraFunction("BFI", "\ \t\ttemplate UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ \t\t{\n\ @@ -2888,7 +2926,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) psContext->AddIndentation(); uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "bitFieldInsert("); glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); bcatcstr(glsl, ", "); @@ -2918,10 +2956,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_LOOP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "while(true){\n"); @@ -2931,30 +2970,33 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_ENDLOOP: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "}\n"); break; } case OPCODE_BREAK: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "break;\n"); break; } case OPCODE_BREAKC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -2962,10 +3004,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_CONTINUEC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -2973,10 +3016,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_IF: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -2985,10 +3029,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_RETC: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); + } psContext->AddIndentation(); TranslateConditional(psInst, glsl); @@ -2997,10 +3042,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_ELSE: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "} else {\n"); psContext->indent++; @@ -3010,10 +3056,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_ENDIF: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "}\n"); break; @@ -3040,10 +3087,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) { const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); + } const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; @@ -3070,10 +3118,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_SWITCH: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "switch(int("); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); @@ -3085,10 +3134,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_CASE: { --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); + } psContext->AddIndentation(); bcatcstr(glsl, "case "); @@ -3100,19 +3150,21 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_EQ: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); + } AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); break; } case OPCODE_USHR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); + } CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); break; } @@ -3120,10 +3172,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -3136,10 +3189,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_ISHR: { SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); + } if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) { @@ -3153,13 +3207,14 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_LD_MS: { const ResourceBinding* psBinding = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); + } psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); @@ -3185,10 +3240,12 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_DISCARD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); + } + psContext->AddIndentation(); if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) { @@ -3207,14 +3264,15 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_LOD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); + } //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, numParenthesis); //If the core language does not have query-lod feature, //then the extension is used. The name of the function @@ -3246,10 +3304,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_EVAL_CENTROID: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); + } psContext->AddIndentation(); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtCentroid("); @@ -3262,10 +3321,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_EVAL_SAMPLE_INDEX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); + } psContext->AddIndentation(); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtSample("); @@ -3280,10 +3340,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_EVAL_SNAPPED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); + } psContext->AddIndentation(); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = interpolateAtOffset("); @@ -3298,19 +3359,21 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_LD_STRUCTURED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); + } TranslateShaderStorageLoad(psInst); break; } case OPCODE_LD_UAV_TYPED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); + } Operand* psDest = &psInst->asOperands[0]; Operand* psSrc = &psInst->asOperands[2]; Operand* psSrcAddr = &psInst->asOperands[1]; @@ -3356,7 +3419,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, numParenthesis); glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); bcatcstr(glsl, ".read("); glsl << TranslateOperand(psSrcAddr, flags, opMask); @@ -3375,7 +3438,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst) glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); } bcatcstr(glsl, ")"); - glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); + glsl << TranslateOperandSwizzle(psSrc, psDest->ui32CompMask, 0); AddAssignPrologue(numParenthesis); #undef RRD @@ -3384,19 +3447,21 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_STORE_RAW: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); + } TranslateShaderStorageStore(psInst); break; } case OPCODE_STORE_STRUCTURED: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); + } TranslateShaderStorageStore(psInst); break; } @@ -3406,10 +3471,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) const ResourceBinding* psRes; int foundResource; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); + } foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psInst->asOperands[0].ui32RegisterNumber, &psRes); @@ -3482,11 +3548,11 @@ void ToMetal::TranslateInstruction(Instruction* psInst) } case OPCODE_LD_RAW: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); + } TranslateShaderStorageLoad(psInst); break; } @@ -3517,13 +3583,14 @@ void ToMetal::TranslateInstruction(Instruction* psInst) case OPCODE_UBFE: case OPCODE_IBFE: { -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); + } bool isUBFE = psInst->eOpcode == OPCODE_UBFE; bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; @@ -3605,7 +3672,7 @@ template vec bitFieldExtractI(const vec width, const ve ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); } - AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "bitFieldExtract"); bcatcstr(glsl, isUBFE ? "U" : "I"); bcatcstr(glsl, "("); @@ -3623,10 +3690,11 @@ template vec bitFieldExtractI(const vec width, const ve const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); + } psContext->AddIndentation(); SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); @@ -3640,7 +3708,7 @@ template vec bitFieldExtractI(const vec width, const ve else srcType = SVT_FLOAT; - AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); bcatcstr(glsl, "(1.0) / "); bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); @@ -3654,10 +3722,11 @@ template vec bitFieldExtractI(const vec width, const ve { uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); + } for (int i = 0; i < 4; i++) { @@ -3666,7 +3735,7 @@ template vec bitFieldExtractI(const vec width, const ve psContext->AddIndentation(); psInst->asOperands[0].ui32CompMask = (1 << i); psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "as_type(half2("); glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); @@ -3679,10 +3748,11 @@ template vec bitFieldExtractI(const vec width, const ve { uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); + } for (int i = 0; i < 4; i++) { @@ -3691,7 +3761,7 @@ template vec bitFieldExtractI(const vec width, const ve psContext->AddIndentation(); psInst->asOperands[0].ui32CompMask = (1 << i); psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "as_type("); glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); @@ -3703,14 +3773,15 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_INEG: { int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); + } //dest = 0 - src0 psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "0 - "); glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); @@ -3721,10 +3792,11 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_DERIV_RTX_FINE: case OPCODE_DERIV_RTX: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); + } CallHelper1("dfdx", psInst, 0, 1, 1); break; } @@ -3732,28 +3804,31 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_DERIV_RTY_FINE: case OPCODE_DERIV_RTY: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); + } CallHelper1("dfdy", psInst, 0, 1, 1); break; } case OPCODE_LRP: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); + } CallHelper3("mix", psInst, 0, 2, 3, 1, 1); break; } case OPCODE_DP2ADD: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); + } psContext->AddIndentation(); bool isFP16 = false; if (CanForceToHalfOperand(&psInst->asOperands[0]) @@ -3762,7 +3837,7 @@ template vec bitFieldExtractI(const vec width, const ve && CanForceToHalfOperand(&psInst->asOperands[2])) isFP16 = true; int parenthesis = 0; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, psInst->ui32PreciseMask, parenthesis); uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; @@ -3779,10 +3854,11 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_POW: { // TODO Check POW opcode whether it actually needs the abs -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); + } psContext->AddIndentation(); glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); bcatcstr(glsl, " = powr(abs("); @@ -3795,12 +3871,13 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_IMM_ATOMIC_ALLOC: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "atomic_fetch_add_explicit("); glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); @@ -3809,12 +3886,13 @@ template vec bitFieldExtractI(const vec width, const ve } case OPCODE_IMM_ATOMIC_CONSUME: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "atomic_fetch_sub_explicit("); glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. @@ -3825,40 +3903,43 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_NOT: { -#ifdef _DEBUG + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); + } psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "~"); + bcatcstr(glsl, "~("); + numParenthesis++; glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); AddAssignPrologue(numParenthesis); break; } case OPCODE_XOR: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); -#endif - + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); + } CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); break; } case OPCODE_RESINFO: { - uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t destElem; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); -#endif - - for (destElem = 0; destElem < destElemCount; ++destElem) + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) { - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); + } + + const uint32_t mask = psInst->asOperands[0].GetAccessMask(); + for (int i = 0; i < 4; ++i) + { + if ((1 << i) & mask) + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[i], i); } break; @@ -3866,23 +3947,25 @@ template vec bitFieldExtractI(const vec width, const ve case OPCODE_BUFINFO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); -#endif - psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450 + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); + } + psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, true); break; } case OPCODE_SAMPLE_INFO: { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); -#endif + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); + } const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); bcatcstr(glsl, ".get_num_samples()"); AddAssignPrologue(numParenthesis); @@ -3921,7 +4004,7 @@ template vec bitFieldExtractI(const vec width, const ve bool isFP16 = false; if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) isFP16 = true; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, psInst->ui32PreciseMask, numParenthesis); bcatcstr(glsl, "clamp("); glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); @@ -3932,3 +4015,82 @@ template vec bitFieldExtractI(const vec width, const ve AddAssignPrologue(numParenthesis); } } + +#if ENABLE_UNIT_TESTS + +#define UNITY_EXTERNAL_TOOL 1 +#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS +#include "Runtime/Testing/Testing.h" + +UNIT_TEST_SUITE(ToMetalInstructionTests) +{ + static void TestAddOpAssignToDest(const char* expect, SHADER_VARIABLE_TYPE srcType, uint32_t srcDim, SHADER_VARIABLE_TYPE dstType, uint32_t dstDim) + { + bstring actual = bfromcstralloc(20, ""); + bstring expected = bfromcstralloc(20, expect); + int parenthesis = 0; + AddOpAssignToDest(actual, srcType, srcDim, dstType, dstDim, 0, parenthesis); + CHECK(bstrcmp(actual, expected) == 0); + bdestroy(actual); + bdestroy(expected); + } + + TEST(AddOpAssignToDest_Works) + { + // Different Type + TestAddOpAssignToDest(" = as_type(", SVT_INT, 1, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = uint(", SVT_INT, 1, SVT_UINT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_INT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_UINT, 1); + + TestAddOpAssignToDest(" = as_type(", SVT_INT16, 1, SVT_FLOAT16, 1); + TestAddOpAssignToDest(" = ushort(", SVT_INT16, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_INT16, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_UINT16, 1); + + // Simply assign + TestAddOpAssignToDest(" = ", SVT_UINT16, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = ", SVT_INT, 4, SVT_INT, 2); + + // Up cast + TestAddOpAssignToDest(" = uint(", SVT_UINT16, 1, SVT_UINT, 1); + TestAddOpAssignToDest(" = float(", SVT_FLOAT16, 1, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = int(", SVT_INT16, 1, SVT_INT, 1); + + // Down cast + TestAddOpAssignToDest(" = ushort(", SVT_UINT, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = half(", SVT_FLOAT, 1, SVT_FLOAT16, 1); + TestAddOpAssignToDest(" = short(", SVT_INT, 1, SVT_INT16, 1); + + // Increase dimensions + TestAddOpAssignToDest(" = float4(", SVT_FLOAT, 1, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = uint3(", SVT_UINT, 1, SVT_UINT, 3); + TestAddOpAssignToDest(" = uint2(", SVT_UINT, 1, SVT_UINT, 2); + + // Decrease dimensions + TestAddOpAssignToDest(" = ", SVT_FLOAT, 4, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = ", SVT_UINT, 3, SVT_UINT, 1); + TestAddOpAssignToDest(" = ", SVT_UINT, 2, SVT_UINT, 1); + + // Reinterop cast + Increase dimensions + TestAddOpAssignToDest(" = as_type(int4(", SVT_INT, 1, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = uint4(", SVT_INT, 1, SVT_UINT, 4); + TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_INT, 4); + TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_UINT, 4); + + // Reinterop cast + Decrease dimensions + TestAddOpAssignToDest(" = as_type(", SVT_INT, 4, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = uint(", SVT_INT, 4, SVT_UINT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_INT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_UINT, 1); + + // Different precision + Different Type + TestAddOpAssignToDest(" = float4(", SVT_INT16, 4, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT16, 4); + + // Sanity check as low precision not used in metal they should fall back + TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT12, 4); + TestAddOpAssignToDest(" = half4(", SVT_INT, 4, SVT_FLOAT10, 4); + } +} +#endif diff --git a/src/toMetalOperand.cpp b/src/toMetalOperand.cpp index 0c55d7c..103d611 100644 --- a/src/toMetalOperand.cpp +++ b/src/toMetalOperand.cpp @@ -573,61 +573,61 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui { case SVT_FLOAT: ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); - if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_FLOAT16: ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("16_"); - if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_FLOAT10: ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("10_"); - if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_INT: ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("i"); - if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_INT16: ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("i16_"); - if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_INT12: ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("i12_"); - if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_UINT: ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("u"); - if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_UINT16: ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("u16_"); - if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_DOUBLE: ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("d"); - if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; case SVT_BOOL: ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); oss << ("b"); - if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1) *pui32IgnoreSwizzle = 1; break; default: @@ -843,7 +843,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } } - if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + if (psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) { switch (rebase) { @@ -887,7 +887,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } } - if (psVarType && psVarType->Class == SVC_SCALAR) + if (psVarType->Class == SVC_SCALAR) { *pui32IgnoreSwizzle = 1;