bring latest changes from upstream ( f60ac47e0193 )

This commit is contained in:
Antti Tapaninen 2020-09-23 14:10:02 -07:00
parent 45cd5126a8
commit bffde1436f
33 changed files with 3966 additions and 2430 deletions

View File

@ -4,8 +4,10 @@
#include <string>
#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE"
#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform"
const unsigned int kArraySizeConstantID = 0;
const unsigned int kPreTransformConstantID = 1;
// TODO: share with Runtime/GfxDevice/InstancingUtilities.h
inline bool IsUnityInstancingConstantBufferName(const char* cbName)
@ -13,3 +15,9 @@ inline bool IsUnityInstancingConstantBufferName(const char* cbName)
static const char kInstancedCbNamePrefix[] = "UnityInstancing";
return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0;
}
inline bool IsPreTransformConstantBufferName(const char* cbName)
{
static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData";
return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0;
}

View File

@ -198,6 +198,11 @@ public:
}
}
uint32_t PeekFirstFreeSlot() const
{
return m_FreeSlots.back();
}
uint32_t SaveTotalShaderStageAllocationsCount()
{
m_ShaderStageAllocations = m_Allocations.size();
@ -216,13 +221,37 @@ private:
//carry over any information needed about a different shader stage
//in order to construct valid GLSL shader combinations.
//Using GLSLCrossDependencyData is optional. However some shader
//combinations may show link failures, or runtime errors.
class GLSLCrossDependencyData
{
public:
struct GLSLBufferBindPointInfo
{
uint32_t slot;
bool known;
};
// A container for a single Vulkan resource binding (<set, binding> pair)
typedef std::pair<uint32_t, uint32_t> VulkanResourceBinding;
struct VulkanResourceBinding
{
uint32_t set;
uint32_t binding;
};
enum GLSLBufferType
{
BufferType_ReadWrite,
BufferType_Constant,
BufferType_SSBO,
BufferType_Texture,
BufferType_UBO,
BufferType_Count,
BufferType_Generic = BufferType_ReadWrite
};
private:
//Required if PixelInterpDependency is true
@ -240,6 +269,13 @@ private:
VulkanResourceBindings m_VulkanResourceBindings;
uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set.
typedef std::map<std::string, uint32_t> GLSLResouceBindings;
public:
GLSLResouceBindings m_GLSLResourceBindings;
uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges
uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings
inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
{
switch (eShaderType)
@ -284,10 +320,6 @@ private:
}
}
typedef std::map<std::string, uint32_t> SpecializationConstantMap;
SpecializationConstantMap m_SpecConstantMap;
uint32_t m_NextSpecID;
public:
GLSLCrossDependencyData()
: eTessPartitioning(),
@ -297,28 +329,79 @@ public:
hasControlPoint(false),
hasPatchConstant(false),
ui32ProgramStages(0),
m_ExtBlendModes(),
m_NextSpecID(0)
m_ExtBlendModes()
{
memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding));
}
// Retrieve the location for a varying with a given name.
// If the name doesn't already have an allocated location, allocate one
// and store it into the map.
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput)
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex)
{
int nspace = GetVaryingNamespace(eShaderType, isInput);
VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
if (itr != varyingLocationsMap[nspace].end())
return itr->second;
if (keepLocation)
{
// Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
// Inspect last 2 characters in name
size_t len = name.length();
if (len > 1)
{
if (isdigit(name[len - 1]))
{
uint32_t index = 0;
if (isdigit(name[len - 2]))
index = atoi(&name[len - 2]); // 2-digits index
else
index = atoi(&name[len - 1]); // 1-digit index
if (index < 32) // Some platforms only allow 32 varying locations
{
// Check that index is not already used
bool canUseIndex = true;
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
{
if (it->second == index)
{
canUseIndex = false;
break;
}
}
if (canUseIndex)
{
varyingLocationsMap[nspace].insert(std::make_pair(name, index));
return index;
}
}
}
}
// fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis
uint32_t maxIndexAlreadyAssigned = 0;
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second);
uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1);
varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex));
return fallbackIndex;
}
else
{
uint32_t newKey = nextAvailableVaryingLocation[nspace];
nextAvailableVaryingLocation[nspace]++;
varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
return newKey;
}
}
// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
// If not found, allocate a new one (in set 0) and return that
@ -326,7 +409,7 @@ public:
// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
inline std::pair<uint32_t, uint32_t> GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
{
// scan for the special marker
const char *marker = "Xhlslcc_set_%d_bind_%dX";
@ -343,11 +426,11 @@ public:
name.erase(startLoc, endLoc - startLoc + 1);
}
// Add to map
VulkanResourceBinding newBind = std::make_pair(Set, Binding);
VulkanResourceBinding newBind = { Set, Binding };
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = std::make_pair(Set, Binding + 1);
VulkanResourceBinding counterBind = { Set, Binding + 1 };
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
@ -359,18 +442,100 @@ public:
return itr->second;
// Allocate a new one
VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
return newBind;
}
// GLSL Bind point handling logic
// Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
// In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero.
// This is only used when we are doing old style binding setup
void SetupGLSLResourceBindingSlotsIndices()
{
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++)
{
m_StructuredBufferBindPoints[i] = i;
}
}
void RemoveBindPointFromAvailableList(uint32_t bindPoint)
{
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++)
{
if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here
{
memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t));
break;
}
}
}
void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type)
{
m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint));
RemoveBindPointFromAvailableList(bindPoint);
}
bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType)
{
return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO;
}
uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType)
{
uint32_t binding = -1;
if (ShouldUseBufferSpecificBinding(bufferType))
{
binding = m_NextAvailableGLSLResourceBinding[bufferType];
}
else
{
binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]];
}
return binding;
}
void UpdateResourceBindingIndex(GLSLBufferType bufferType)
{
if (ShouldUseBufferSpecificBinding(bufferType))
{
m_NextAvailableGLSLResourceBinding[bufferType]++;
}
else
{
m_NextAvailableGLSLResourceBinding[BufferType_Generic]++;
}
}
inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType)
{
GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name);
if (itr != m_GLSLResourceBindings.end())
{
return GLSLBufferBindPointInfo{ itr->second, true };
}
uint32_t binding = GetGLSLBufferBindPointIndex(bufferType);
UpdateResourceBindingIndex(bufferType);
m_GLSLResourceBindings.insert(std::make_pair(name, binding));
return GLSLBufferBindPointInfo{ binding, false };
}
//dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
//but they appear on inputs inside domain shaders for GL.
//Hull shader must be compiled before domain so the
@ -437,23 +602,32 @@ public:
varyingLocationsMap[i].clear();
nextAvailableVaryingLocation[i] = 0;
}
m_NextSpecID = kArraySizeConstantID + 1;
m_SpecConstantMap.clear();
m_SharedFunctionMembers.clear();
m_SharedDependencies.clear();
}
// Retrieve or allocate a layout slot for Vulkan specialization constant
inline uint32_t GetSpecializationConstantSlot(const std::string &name)
bool IsHullShaderInputAlreadyDeclared(const std::string& name)
{
SpecializationConstantMap::iterator itr = m_SpecConstantMap.find(name);
if (itr != m_SpecConstantMap.end())
return itr->second;
bool isKnown = false;
m_SpecConstantMap.insert(std::make_pair(std::string(name), m_NextSpecID));
return m_NextSpecID++;
for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx)
{
if (m_hullShaderInputs[idx] == name)
{
isKnown = true;
break;
}
}
return isKnown;
}
void RecordHullShaderInput(const std::string& name)
{
m_hullShaderInputs.push_back(name);
}
std::vector<std::string> m_hullShaderInputs;
};
struct GLSLShader
@ -491,6 +665,21 @@ public:
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {}
virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {}
// these are for now metal only (but can be trivially added for other backends if needed)
// they are useful mostly for diagnostics as interim values are actually hidden from user
virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {}
virtual void OnBuiltinOutput(SPECIAL_NAME name) {}
virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {}
enum AccessType
{
ReadAccess = 1 << 0,
WriteAccess = 1 << 1
};
virtual void OnStorageImage(int bindIndex, unsigned int access) {}
};
@ -543,10 +732,10 @@ static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
// If set, adds location qualifiers to intra-shader varyings.
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000;
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf)
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS
// so that if that macro is defined, all UBO declarations will become normal uniforms
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS
// so that if that macro is undefined, all UBO declarations will become normal uniforms
static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
@ -567,8 +756,7 @@ static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead.
static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000;
// If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants
static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000;
// Unused 0x200000;
// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch
static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000;
@ -586,6 +774,18 @@ static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000;
// Disable fastmath
static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000;
//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that)
static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000;
// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to
static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000;
// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000;
// Code generation might vary for mobile targets, or using lower sampler precision than full by default
static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000;
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -678,7 +678,7 @@ typedef uint_least64_t uint_fast64_t;
# elif defined(__i386__) || defined(_WIN32) || defined(WIN32)
# define stdint_intptr_bits 32
# elif defined(__INTEL_COMPILER)
/* TODO -- what did Intel do about x86-64? */
#error Unknown compiler
# endif
# ifdef stdint_intptr_bits
@ -711,9 +711,7 @@ typedef uint_least64_t uint_fast64_t;
typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t;
typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t;
# else
/* TODO -- This following is likely wrong for some platforms, and does
nothing for the definition of uintptr_t. */
typedef ptrdiff_t intptr_t;
#error Unknown compiler
# endif
# define STDINT_H_UINTPTR_T_DEFINED
#endif

View File

@ -9,7 +9,7 @@
using namespace HLSLcc::ControlFlow;
using HLSLcc::ForEachOperand;
const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction)
const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction)
{
using std::for_each;
@ -17,7 +17,7 @@ const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction)
m_BlockStorage.clear();
// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL);
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction);
// Build the reachable set for each block
bool hadChanges;
@ -58,10 +58,11 @@ BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *ins
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
// Auto-registers itself into ControlFlowGraph
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead)
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction)
: m_Graph(graph)
, m_First(psFirst)
, m_Last(NULL)
, m_End(endInstruction)
{
m_UEVar.clear();
m_VarKill.clear();
@ -94,7 +95,7 @@ BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, cons
void BasicBlock::Build()
{
const Instruction *inst = m_First;
while (1)
while (inst != m_End)
{
// Process sources first
ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
@ -158,7 +159,8 @@ void BasicBlock::Build()
default:
break;
case OPCODE_RET:
blockDone = true;
// Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309)
// blockDone = true;
break;
case OPCODE_RETC:
// Basic block is done, start a next one.
@ -240,7 +242,7 @@ void BasicBlock::Build()
m_Reachable = m_DEDef;
// Tag the end of the basic block
m_Last = inst;
m_Last = std::max(m_First, std::min(inst, m_End - 1));
// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
}
@ -256,7 +258,7 @@ BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
return b;
}
// Otherwise create one. Self-registering and self-connecting
return new BasicBlock(psFirst, m_Graph, m_First);
return new BasicBlock(psFirst, m_Graph, m_First, m_End);
}
bool BasicBlock::RebuildReachable()
@ -334,6 +336,7 @@ void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
#if ENABLE_UNIT_TESTS
#define UNITY_EXTERNAL_TOOL 1
#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS
#include "Testing.h" // From Runtime/Testing
UNIT_TEST_SUITE(HLSLcc)
@ -348,7 +351,7 @@ UNIT_TEST_SUITE(HLSLcc)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[1], root.Last());
@ -403,7 +406,7 @@ UNIT_TEST_SUITE(HLSLcc)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(root.First(), &inst[0]);
CHECK_EQUAL(root.Last(), &inst[2]);
@ -539,7 +542,7 @@ UNIT_TEST_SUITE(HLSLcc)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[4], root.Last());
@ -699,7 +702,7 @@ UNIT_TEST_SUITE(HLSLcc)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[2], root.Last());

View File

@ -430,13 +430,17 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
case OPCODE_LD:
case OPCODE_LD_MS:
// TODO: Would need to know the sampler return type
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
{
SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0];
MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
break;
}
case OPCODE_MOVC:
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
break;
case OPCODE_SWAPC:
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
@ -455,6 +459,7 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
break;
}
break;
case OPCODE_SAMPLE_INFO:
// Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint.
@ -594,7 +599,16 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
case OPCODE_DCL_RESOURCE_STRUCTURED:
case OPCODE_SYNC:
// TODO
case OPCODE_EVAL_SNAPPED:
case OPCODE_EVAL_SAMPLE_INDEX:
case OPCODE_EVAL_CENTROID:
case OPCODE_DCL_GS_INSTANCE_COUNT:
case OPCODE_ABORT:
case OPCODE_DEBUG_BREAK:
// Double not supported
case OPCODE_DADD:
case OPCODE_DMAX:
case OPCODE_DMIN:
@ -607,15 +621,7 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
case OPCODE_DMOVC:
case OPCODE_DTOF:
case OPCODE_FTOD:
case OPCODE_EVAL_SNAPPED:
case OPCODE_EVAL_SAMPLE_INDEX:
case OPCODE_EVAL_CENTROID:
case OPCODE_DCL_GS_INSTANCE_COUNT:
case OPCODE_ABORT:
case OPCODE_DEBUG_BREAK:*/
*/
default:
break;

View File

@ -7,6 +7,7 @@
#include "internal_includes/debug.h"
#include "internal_includes/Translator.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/languages.h"
#include "include/hlslcc.h"
#include <sstream>
@ -49,8 +50,8 @@ void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
// Only do sampler precision downgrade on pixel shaders.
if (psShader->eShaderType == PIXEL_SHADER)
// Only do sampler precision downgrade with pixel shaders on mobile targets / Switch
if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch()))
UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
@ -64,6 +65,55 @@ void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
}
void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs()
{
if (psShader->eShaderType != PIXEL_SHADER)
return;
if (!psShader->extensions->EXT_shader_framebuffer_fetch)
return;
if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0)
return;
if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST))
return;
if (!psDependencies)
return;
if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) &&
((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0))
return;
// The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments
// So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures.
int maxInOutRenderTargetIndex = -1;
for (const Declaration& decl : psShader->asPhases[0].psDecl)
{
if (decl.eOpcode != OPCODE_DCL_INPUT_PS)
continue;
const Operand& operand = decl.asOperands[0];
if (!operand.iPSInOut)
continue;
const ShaderInfo::InOutSignature* signature = NULL;
if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true))
continue;
const int index = signature->ui32SemanticIndex;
if (index > maxInOutRenderTargetIndex)
maxInOutRenderTargetIndex = index;
}
if (maxInOutRenderTargetIndex >= 0)
{
if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture])
psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1;
}
}
void HLSLCrossCompilerContext::ClearDependencyData()
{
switch (psShader->eShaderType)

View File

@ -1,6 +1,7 @@
#include "hlslcc.h"
#include <memory>
#include <sstream>
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/toGLSL.h"
#include "internal_includes/toMetal.h"
@ -27,6 +28,27 @@
#define GL_COMPUTE_SHADER 0x91B9
#endif
static bool CheckConstantBuffersNoDuplicateNames(const std::vector<ConstantBuffer>& buffers, HLSLccReflection& reflectionCallbacks)
{
uint32_t count = buffers.size();
for (uint32_t i = 0; i < count; ++i)
{
const ConstantBuffer& lhs = buffers[i];
for (uint32_t j = i + 1; j < count; ++j)
{
const ConstantBuffer& rhs = buffers[j];
if (lhs.name == rhs.name)
{
std::ostringstream oss;
oss << "Duplicate constant buffer declaration: " << lhs.name;
reflectionCallbacks.OnDiagnostics(oss.str(), 0, true);
return false;
}
}
}
return true;
}
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
unsigned int flags,
@ -49,6 +71,10 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
if (psShader.get())
{
Shader* shader = psShader.get();
if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks))
return 0;
HLSLCrossCompilerContext sContext(reflectionCallbacks);
// Add shader precisions from the list
@ -59,7 +85,11 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
}
sContext.psShader = psShader.get();
#ifdef _DEBUG
flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS;
#endif
sContext.psShader = shader;
sContext.flags = flags;
// If dependencies == NULL, we'll create a dummy object for it so that there's always something there.
@ -68,6 +98,7 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
{
depPtr.reset(new GLSLCrossDependencyData());
sContext.psDependencies = depPtr.get();
sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices();
}
else
sContext.psDependencies = dependencies;

View File

@ -96,8 +96,7 @@ namespace HLSLcc
}
}
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
const int components)
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components)
{
static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
@ -114,6 +113,7 @@ namespace HLSLcc
case SVT_UINT:
return uintTypes[components];
case SVT_UINT16:
case SVT_UINT8: // there is not uint8 in metal so treat it as ushort
return ushortTypes[components];
case SVT_INT:
return intTypes[components];
@ -304,6 +304,47 @@ namespace HLSLcc
}
}
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type)
{
switch (type)
{
case SVT_INT:
case SVT_INT12:
case SVT_INT16:
return RETURN_TYPE_SINT;
case SVT_UINT:
case SVT_UINT16:
return RETURN_TYPE_UINT;
case SVT_FLOAT:
case SVT_FLOAT10:
case SVT_FLOAT16:
return RETURN_TYPE_FLOAT;
default:
return RETURN_TYPE_UNUSED;
}
}
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type)
{
switch (type)
{
case SVT_INT:
case SVT_UINT:
case SVT_FLOAT:
return REFLECT_RESOURCE_PRECISION_HIGHP;
case SVT_INT16:
case SVT_UINT16:
case SVT_FLOAT16:
return REFLECT_RESOURCE_PRECISION_MEDIUMP;
case SVT_INT12:
case SVT_FLOAT10:
case SVT_UINT8:
return REFLECT_RESOURCE_PRECISION_LOWP;
default:
return REFLECT_RESOURCE_PRECISION_UNKNOWN;
}
}
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
{
return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
@ -454,8 +495,8 @@ namespace HLSLcc
if (context->psShader->eTargetLanguage == LANG_METAL)
{
// avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size
if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT))
// avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size
if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT))
return true;
}

View File

@ -9,7 +9,7 @@ uint32_t Operand::GetAccessMask() const
{
int i;
uint32_t accessMask = 0;
// TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now.
// NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now.
switch (eSelMode)
{
default:
@ -245,6 +245,10 @@ int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
{
// indexable temps (temp arrays) are always float
if (eType == OPERAND_TYPE_INDEXABLE_TEMP)
return SVT_FLOAT;
// The min precision qualifier overrides all of the stuff below
switch (eMinPrecision)
{
@ -468,15 +472,12 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
{
int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar)
{
return psVarType->Type;
}
ASSERT(0);
}
else
{
// Todo: this isn't correct yet.
return SVT_FLOAT;
}
ASSERT(0);
break;
}
case OPERAND_TYPE_IMMEDIATE32:
@ -520,7 +521,6 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT;
}
case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats
default:
{
return SVT_FLOAT;
@ -572,7 +572,6 @@ int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) cons
ASSERT(psSig != NULL);
// TODO: Are there ever any cases where the mask has 'holes'?
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
}
@ -603,9 +602,9 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co
else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL)
{
// check which one of the src operands is the original index
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[2];
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[3];
}
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)

View File

@ -65,51 +65,11 @@ void Shader::ConsolidateHullTempVars()
}
}
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero.
void Shader::PrepareStructuredBufferBindingSlots()
{
uint32_t i;
for (i = 0; i < MAX_RESOURCE_BINDINGS; i++)
{
aui32StructuredBufferBindingPoints[i] = i;
}
}
// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list
void Shader::ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase)
{
uint32_t p;
std::vector<uint32_t> &bindingArray = aui32StructuredBufferBindingPoints;
for (p = 0; p < psPhase->psDecl.size(); ++p)
{
if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW ||
psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)
{
uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point
uint32_t i;
// Find uav binding point from the list. Drop search if not found.
for (i = 0; i < MAX_RESOURCE_BINDINGS && bindingArray[i] <= uav; i++)
{
if (bindingArray[i] == uav) // Remove uav binding point from the list by copying array remainder here
{
memcpy(&bindingArray[i], &bindingArray[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t));
break;
}
}
}
}
}
// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing.
// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly.
// While doing that we also get the number of components expected in the image format.
// Also resolve access flags for other UAVs as well. No component count resolving for them.
void ShaderPhase::ResolveUAVProperties()
void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo)
{
Declaration *psFirstDeclaration = &psDecl[0];
@ -163,8 +123,10 @@ void ShaderPhase::ResolveUAVProperties()
case OPCODE_ATOMIC_XOR:
case OPCODE_ATOMIC_IMIN:
case OPCODE_ATOMIC_UMIN:
case OPCODE_ATOMIC_IMAX:
case OPCODE_ATOMIC_UMAX:
opIndex = 0;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 1;
break;
@ -179,7 +141,7 @@ void ShaderPhase::ResolveUAVProperties()
case OPCODE_IMM_ATOMIC_EXCH:
case OPCODE_IMM_ATOMIC_CMP_EXCH:
opIndex = 1;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 1;
break;
@ -211,7 +173,7 @@ void ShaderPhase::ResolveUAVProperties()
case OPCODE_IMM_ATOMIC_ALLOC:
case OPCODE_IMM_ATOMIC_CONSUME:
opIndex = 1;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 0;
break;
@ -235,6 +197,16 @@ void ShaderPhase::ResolveUAVProperties()
psDecl->sUAV.ui32NumComponents = numComponents;
}
}
if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
{
const ResourceBinding* psBinding = 0;
if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding))
{
// component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4
psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1;
}
}
}
}
@ -601,6 +573,18 @@ void Shader::AnalyzeIOOverlap()
}
}
void Shader::SetMaxSemanticIndex()
{
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
}
// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array.
// Luckily, each chunk is always accessed with suboperand plus <constant> (in ui32RegisterNumber)
// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read.
@ -753,7 +737,7 @@ HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG()
{
if (!m_CFGInitialized)
{
m_CFG.Build(&psInst[0]);
m_CFG.Build(psInst.data(), psInst.data() + psInst.size());
m_CFGInitialized = true;
}

View File

@ -4,6 +4,8 @@
#include "Operand.h"
#include <stdlib.h>
#include <sstream>
#include <cctype>
SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
{
@ -385,24 +387,133 @@ ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
return RGROUP_CBUFFER;
}
static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn)
{
ASSERT(samplerIn.compare(0, 7, "sampler") == 0);
// please note that we do not have hard rules about how sampler names should be structured
// what's more they can even skip texture name (but that should be handled separately)
// how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word"
// note that we want to support c-style naming (with underscores for spaces)
// as it is pretty normal to have texture name starting with underscore
// we bind underscores "to the right"
// note that we want sampler state to be case insensitive
// while checking for a match could be done with strncasecmp/_strnicmp
// windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead
std::string sampler = samplerIn;
for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i)
*i = std::tolower(*i);
struct Token { const char* str; int len; };
#define TOKEN(s) { s, (int)strlen(s) }
Token token[] = {
TOKEN("compare"),
TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"),
TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"),
TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"),
TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"),
TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"),
};
#undef TOKEN
const char* s = sampler.c_str();
for (int texNameStart = 7; s[texNameStart];)
{
// skip underscores and find the potential beginning of a token
int tokenStart = texNameStart, tokenEnd = -1;
while (s[tokenStart] == '_')
++tokenStart;
// check token list for matches
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i)
if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0)
tokenEnd = tokenStart + token[i].len;
if (tokenEnd < 0)
{
// we have found texture name
// find next token
int nextTokenStart = sampler.length();
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i)
{
// again: note that we want to be case insensitive
const int pos = sampler.find(token[i].str, tokenStart);
if (pos != std::string::npos && pos < nextTokenStart)
nextTokenStart = pos;
}
// check preceeding underscores, but only if we have found an actual token (not the end of the string)
if (nextTokenStart < sampler.length())
{
while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_')
--nextTokenStart;
}
// note that we return the substring of the initial sampler name to preserve case
return samplerIn.substr(texNameStart, nextTokenStart - texNameStart);
}
else
{
// we have found known token
texNameStart = tokenEnd;
}
}
// if we ended up here, the texture name is missing
return "";
}
// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below
// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test
static inline void Test_GetTextureNameFromSamplerName()
{
#define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t))
CHECK("sampler_point_clamp", "");
CHECK("sampler_point_clamp_Tex", "_Tex");
CHECK("sampler_point_clamp_Tex__", "_Tex__");
CHECK("sampler_______point_Tex", "_Tex");
CHECK("samplerPointClamp", "");
CHECK("samplerPointClamp_Tex", "_Tex");
CHECK("samplerPointClamp_Tex__", "_Tex__");
CHECK("samplerPointTexClamp", "Tex");
CHECK("samplerPoint_TexClamp", "_Tex");
CHECK("samplerPoint_Tex_Clamp", "_Tex");
#undef CHECK
}
void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
{
if (info.empty())
return;
#if _DEBUG && 0
Test_GetTextureNameFromSamplerName();
#endif
for (size_t i = 0; i < psResourceBindings.size(); i++)
{
ResourceBinding *rb = &psResourceBindings[i];
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE)
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED)
continue;
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match
// Try finding the exact match
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name);
// If match not found, check if name has "sampler" prefix
// -> try finding a match without the prefix (DX11 style sampler case)
// If match not found, check if name has "sampler" prefix (DX11 style sampler case)
// then we try to recover texture name from sampler name
if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
j = info.find(rb->name.substr(7, rb->name.size() - 7));
j = info.find(GetTextureNameFromSamplerName(rb->name));
// note that if we didnt find the respective texture, we cannot say anything about sampler precision
// currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float
// TODO: should we somehow allow overriding it?
if (j != info.end())
rb->ePrecision = j->second;
}

View File

@ -582,8 +582,6 @@ static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps,
// Add this define and all its siblings to the table and try again
AddDefineToList(defs, *dl);
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
canSplit = 0;
break;
}
dl++;
@ -642,61 +640,6 @@ void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineCh
}
}
// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process.
// Sampler ops, bitwise ops and comparisons are ignored.
static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType)
{
Instruction *psInst = du->psInst;
int hasFullPrecOperands = 0;
uint32_t i;
if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
return 0;
switch (psInst->eOpcode)
{
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_MOV:
case OPCODE_MAD:
case OPCODE_DIV:
case OPCODE_LOG:
case OPCODE_EXP:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_DP2:
case OPCODE_DP2ADD:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_RSQ:
case OPCODE_SQRT:
break;
default:
return 0;
}
for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++)
{
Operand *op = &psInst->asOperands[i];
if (op->eType == OPERAND_TYPE_IMMEDIATE32)
continue; // Immediate values are ignored
if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
{
hasFullPrecOperands = 1;
break;
}
}
if (hasFullPrecOperands)
return 0;
if (pType)
*pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump
return 1;
}
// Returns true if all the usages of this definitions are instructions that deal with floating point data
static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
{
@ -747,8 +690,7 @@ void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains
while (du != psDUChains[i].end())
{
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
|| CanDowngradeDefinitionPrecision(du, &sType))
if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
&& du->isStandalone

View File

@ -696,7 +696,6 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token,
// int iTupleSrc = 0, iTupleDest = 0;
//const uint32_t ui32ConstCount = pui32Token[1] - 2;
//const uint32_t ui32TupleCount = (ui32ConstCount / 4);
/*CUSTOMDATA_CLASS eClass =*/ DecodeCustomDataClass(pui32Token[0]);
const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4;
@ -841,6 +840,7 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns
psInst->eOpcode = eOpcode;
psInst->bSaturate = DecodeInstructionSaturate(*pui32Token);
psInst->ui32PreciseMask = DecodeInstructionPreciseMask(*pui32Token);
psInst->bAddressOffset = 0;
@ -1386,7 +1386,6 @@ const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens,
while (1) //Keep going until we reach the first non-declaration token, or the end of the shader.
{
uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken);
/*const uint32_t bExtended =*/ DecodeIsOpcodeExtended(*pui32CurrentToken);
const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken);
if (eOpcode == OPCODE_CUSTOMDATA)

View File

@ -30,7 +30,7 @@ namespace ControlFlow
typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
const BasicBlock &Build(const Instruction *firstInstruction);
const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction);
// Only works for instructions that start the basic block
const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
@ -62,15 +62,17 @@ namespace ControlFlow
struct Definition
{
Definition(const Instruction *i = NULL, const Operand *o = NULL)
Definition(const Instruction* i = nullptr, const Operand* o = nullptr)
: m_Instruction(i)
, m_Operand(o)
{}
Definition(const Definition &a)
: m_Instruction(a.m_Instruction)
, m_Operand(a.m_Operand)
{}
Definition(const Definition& a) = default;
Definition(Definition&& a) = default;
~Definition() = default;
Definition& operator=(const Definition& a) = default;
Definition& operator=(Definition&& a) = default;
bool operator==(const Definition& a) const
{
@ -118,7 +120,7 @@ namespace ControlFlow
private:
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead);
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd);
// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
void Build();
@ -133,6 +135,7 @@ namespace ControlFlow
const Instruction *m_First; // The first instruction in the basic block
const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
const Instruction *m_End; // past-the-end pointer
RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
RegisterSet m_VarKill; // Set of variables that are defined in this block.

View File

@ -15,14 +15,16 @@ typedef struct ICBVec4_TAG
#define ACCESS_FLAG_READ 0x1
#define ACCESS_FLAG_WRITE 0x2
#define ACCESS_FLAG_ATOMIC 0x4
struct Declaration
{
Declaration()
:
Declaration() :
eOpcode(OPCODE_INVALID),
ui32NumOperands(0),
ui32BufferStride(0)
ui32BufferStride(0),
ui32TableLength(0),
ui32IsShadowTex(0)
{}
OPCODE_TYPE eOpcode;
@ -85,6 +87,12 @@ struct Declaration
{
uint32_t ui32Stride;
uint32_t ui32Count;
TGSM_TAG() :
ui32Stride(0),
ui32Count(0)
{
}
} sTGSM;
struct IndexableTemp_TAG
@ -92,6 +100,13 @@ struct Declaration
uint32_t ui32RegIndex;
uint32_t ui32RegCount;
uint32_t ui32RegComponentSize;
IndexableTemp_TAG() :
ui32RegIndex(0),
ui32RegCount(0),
ui32RegComponentSize(0)
{
}
} sIdxTemp;
uint32_t ui32TableLength;

View File

@ -15,7 +15,21 @@ class HLSLccReflection;
class HLSLCrossCompilerContext
{
public:
HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {}
HLSLCrossCompilerContext(HLSLccReflection &refl) :
glsl(nullptr),
extensions(nullptr),
beforeMain(nullptr),
currentGLSLString(nullptr),
currentPhase(0),
indent(0),
flags(0),
psShader(nullptr),
psDependencies(nullptr),
inputPrefix(nullptr),
outputPrefix(nullptr),
psTranslator(nullptr),
m_Reflection(refl)
{}
bstring glsl;
bstring extensions;
@ -42,6 +56,7 @@ public:
const char *outputPrefix; // Prefix for shader outputs
void DoDataTypeAnalysis(ShaderPhase *psPhase);
void ReserveFramebufferFetchInputs();
void ClearDependencyData();

View File

@ -41,6 +41,10 @@ namespace HLSLcc
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type);
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type);
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);

View File

@ -21,23 +21,51 @@
struct Instruction
{
Instruction()
: eOpcode(OPCODE_NOP)
, eBooleanTestType(INSTRUCTION_TEST_ZERO)
, ui32NumOperands(0)
, ui32FirstSrc(0)
, m_Uses()
, m_SkipTranslation(false)
, m_InductorRegister(0)
, bSaturate(0)
, m_IsStaticBranch(false)
, m_StaticBranchCondition(NULL)
Instruction() :
eOpcode(OPCODE_NOP),
eBooleanTestType(INSTRUCTION_TEST_ZERO),
ui32NumOperands(0),
ui32FirstSrc(0),
m_Uses(),
m_SkipTranslation(false),
m_InductorRegister(0),
bSaturate(0),
ui32SyncFlags(0),
ui32PreciseMask(0),
ui32FuncIndexWithinInterface(0),
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
bAddressOffset(0),
iUAddrOffset(0),
iVAddrOffset(0),
iWAddrOffset(0),
xType(RETURN_TYPE_UNUSED),
yType(RETURN_TYPE_UNUSED),
zType(RETURN_TYPE_UNUSED),
wType(RETURN_TYPE_UNUSED),
eResDim(RESOURCE_DIMENSION_UNKNOWN),
iCausedSplit(0),
id(0)
{
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
}
// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0)
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) :
ui32SyncFlags(0),
bSaturate(0),
ui32PreciseMask(0),
ui32FuncIndexWithinInterface(0),
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
bAddressOffset(0),
iUAddrOffset(0),
iVAddrOffset(0),
iWAddrOffset(0),
xType(RETURN_TYPE_UNUSED),
yType(RETURN_TYPE_UNUSED),
zType(RETURN_TYPE_UNUSED),
wType(RETURN_TYPE_UNUSED),
eResDim(RESOURCE_DIMENSION_UNKNOWN),
iCausedSplit(0)
{
id = _id;
eOpcode = opcode;
@ -119,6 +147,7 @@ struct Instruction
uint32_t ui32FirstSrc;
Operand asOperands[6];
uint32_t bSaturate;
uint32_t ui32PreciseMask;
uint32_t ui32FuncIndexWithinInterface;
RESINFO_RETURN_TYPE eResInfoReturnType;
@ -130,15 +159,16 @@ struct Instruction
RESOURCE_DIMENSION eResDim;
int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
bool m_IsStaticBranch; // If true, this instruction is a static branch
const Instruction *m_StaticBranchCondition; // If this is a static branch, this instruction points to the condition instruction. Can also be NULL if the operand itself is the condition
std::string m_StaticBranchName; // The name of the static branch variable, with the condition encoded in it.
struct Use
{
Use() : m_Inst(0), m_Op(0) {}
Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
Use(const Use& a) = default;
Use(Use&& a) = default;
Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
~Use() = default;
Use& operator=(const Use& a) = default;
Use& operator=(Use&& a) = default;
Instruction* m_Inst; // The instruction that references the result of this instruction
Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand.

View File

@ -129,8 +129,13 @@ public:
struct Define
{
Define() : m_Inst(0), m_Op(0) {}
Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
Define(const Define& a) = default;
Define(Define&& a) = default;
Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
~Define() = default;
Define& operator=(const Define& other) = default;
Define& operator=(Define&& other) = default;
Instruction* m_Inst; // Instruction that writes to the temp
Operand* m_Op; // The (destination) operand within that instruction.

View File

@ -15,9 +15,9 @@
struct ConstantArrayChunk
{
ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {}
ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {}
ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
: m_Size(sz), m_AccessMask(mask)
: m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0)
{
m_UseSites.push_back(firstUse);
}
@ -63,7 +63,7 @@ public:
m_NextTexCoordTemp(0)
{}
void ResolveUAVProperties();
void ResolveUAVProperties(const ShaderInfo& sInfo);
void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
@ -103,9 +103,6 @@ public:
uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
// Instructions that are static branches (branches based on constant buffer values only)
std::vector<Instruction *> m_StaticBranchInstructions;
private:
bool m_CFGInitialized;
HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
@ -143,9 +140,8 @@ public:
aiOpcodeUsed(NUM_OPCODES, 0),
ui32CurrentVertexOutputStream(0),
textureSamplers(),
aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0),
ui32CurrentStructuredBufferIndex(),
m_DummySamplerDeclared(false)
m_DummySamplerDeclared(false),
maxSemanticIndex(0)
{
}
@ -157,14 +153,6 @@ public:
//Convert from per-phase temps to global temps.
void ConsolidateHullTempVars();
// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list
void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase);
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero.
void PrepareStructuredBufferBindingSlots();
// Detect temp registers per data type that are actually used.
void PruneTempRegisters();
@ -172,6 +160,9 @@ public:
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
void AnalyzeIOOverlap();
// Compute maxSemanticIndex based on the results of AnalyzeIOOverlap
void SetMaxSemanticIndex();
// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
void ForcePositionToHighp();
@ -245,9 +236,6 @@ public:
TextureSamplerPairs textureSamplers;
std::vector<uint32_t> aui32StructuredBufferBindingPoints;
uint32_t ui32CurrentStructuredBufferIndex;
std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
std::vector<char> psInt16TempSizes; // min16ints
std::vector<char> psInt12TempSizes; // min12ints
@ -260,6 +248,7 @@ public:
std::vector<char> psBoolTempSizes; // ... and for bools
bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that.
uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis
private:
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);

View File

@ -43,6 +43,14 @@ static int HaveOverloadedTextureFuncs(const GLLang eLang)
return 1;
}
static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext)
{
if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0)
return true;
return false;
}
//Only enable for ES. Vulkan and Switch.
//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan.
static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext)
@ -308,4 +316,13 @@ static int HaveImageLoadStore(const GLLang eLang)
return 0;
}
static int HavePreciseQualifier(const GLLang eLang)
{
if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang
{
return 1;
}
return 0;
}
#endif

View File

@ -10,9 +10,16 @@ class ToGLSL : public Translator
protected:
GLLang language;
bool m_NeedUnityInstancingArraySizeDecl;
bool m_NeedUnityPreTransformDecl;
public:
explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {}
explicit ToGLSL(HLSLCrossCompilerContext* ctx) :
Translator(ctx),
language(LANG_DEFAULT),
m_NeedUnityInstancingArraySizeDecl(false),
m_NeedUnityPreTransformDecl(false),
m_NumDeclaredWhileTrueLoops(0)
{}
// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
GLLang SetLanguage(GLLang suggestedLanguage);
@ -22,27 +29,20 @@ public:
virtual void SetIOPrefixes();
private:
// Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants.
void IdentifyStaticBranches(ShaderPhase *psPhase);
// May return false when we detect too complex stuff (matrices, arrays etc)
bool BuildStaticBranchNameForInstruction(Instruction &inst);
void DeclareSpecializationConstants(ShaderPhase &phase);
void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false);
void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false);
void TranslateInstruction(Instruction* psInst, bool isEmbedded = false);
void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false);
void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false);
void TranslateOperandIndex(const Operand* psOperand, int index);
void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add);
void AddOpAssignToDestWithMask(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask);
void AddAssignToDest(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis);
void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
@ -53,10 +53,12 @@ private:
void AddUserOutput(const Declaration* psDecl);
void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl);
void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix = false);
void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection);
void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType);
void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl);
void ReportStruct(const std::string &name, const struct ShaderVarType* psType);
typedef enum
{
CMP_EQ,
@ -68,8 +70,8 @@ private:
void AddComparison(Instruction* psInst, ComparisonType eType,
uint32_t typeFlag);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded = false);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise);
void CallBinaryOp(const char* name, Instruction* psInst,
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false);
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
@ -109,6 +111,8 @@ private:
Instruction* psInst,
bstring glsl);
void HandleSwitchTransformation(Instruction* psInst, bstring glsl);
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
bool DeclareExtraFunction(const std::string &name, bstring body);
void UseExtraFunctionDependency(const std::string &name);
@ -124,6 +128,117 @@ private:
FunctionDefinitions m_FunctionDefinitions;
std::vector<std::string> m_FunctionDefinitionsOrder;
std::vector<std::string> m_AdditionalDefinitions;
std::vector<std::string> m_DefinedStructs;
std::set<uint32_t> m_DeclaredRenderTarget;
int m_NumDeclaredWhileTrueLoops;
struct SwitchConversion
{
/*
IF (CONDITION1) BREAK; STATEMENT1; IF (CONDITION2) BREAK; STATEMENT2;... transforms to
if (CONDITION1) {} ELSE { STATEMENT1; IF (CONDITION2) {} ELSE {STATEMENT2; ...} }
thus, we need to count the "BREAK" statements we encountered in each IF on the same level inside a SWITCH.
*/
struct ConditionalInfo
{
int breakCount; // Count BREAK on the same level to emit enough closing braces afterwards
bool breakEncountered; // Just encountered a BREAK statment, potentially need to emit "ELSE"
bool endifEncountered; // We need to check for "ENDIF ELSE" sequence, and not emit "else" if we see it
ConditionalInfo() :
ConditionalInfo(0, false)
{}
explicit ConditionalInfo(int initialBreakCount) :
ConditionalInfo(initialBreakCount, false)
{}
ConditionalInfo(int initialBreakCount, bool withEndif) :
ConditionalInfo(initialBreakCount, withEndif, false)
{}
ConditionalInfo(int initialBreakCount, bool withEndif, bool withBreak) :
breakCount(initialBreakCount),
endifEncountered(withEndif),
breakEncountered(withBreak)
{}
};
bstring switchOperand;
// We defer emitting if (condition) for each CASE statement to concatenate possible CASE A: CASE B:... into one if ().
std::vector<bstring> currentCaseOperands;
std::vector<ConditionalInfo> conditionalsInfo;
int isInLoop; // We don't count "BREAK" (end emit them) if we're in a loop.
bool isFirstCase;
SwitchConversion() :
switchOperand(bfromcstr("")),
isInLoop(0),
isFirstCase(true)
{}
SwitchConversion(const SwitchConversion& other) :
switchOperand(bstrcpy(other.switchOperand)),
conditionalsInfo(other.conditionalsInfo),
isInLoop(other.isInLoop),
isFirstCase(other.isFirstCase)
{
currentCaseOperands.reserve(other.currentCaseOperands.size());
for (size_t i = 0; i < other.currentCaseOperands.size(); ++i)
currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i]));
}
SwitchConversion(SwitchConversion&& other) :
switchOperand(other.switchOperand),
currentCaseOperands(std::move(other.currentCaseOperands)),
conditionalsInfo(std::move(other.conditionalsInfo)),
isInLoop(other.isInLoop),
isFirstCase(other.isFirstCase)
{
other.switchOperand = nullptr;
}
~SwitchConversion()
{
bdestroy(switchOperand);
for (size_t i = 0; i < currentCaseOperands.size(); ++i)
bdestroy(currentCaseOperands[i]);
}
SwitchConversion& operator=(const SwitchConversion& other)
{
if (this == &other)
return *this;
switchOperand = bstrcpy(other.switchOperand);
conditionalsInfo = other.conditionalsInfo;
isInLoop = other.isInLoop;
isFirstCase = other.isFirstCase;
currentCaseOperands.reserve(other.currentCaseOperands.size());
for (size_t i = 0; i < other.currentCaseOperands.size(); ++i)
currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i]));
return *this;
}
SwitchConversion& operator=(SwitchConversion&& other)
{
if (this == &other)
return *this;
switchOperand = other.switchOperand;
conditionalsInfo = std::move(other.conditionalsInfo);
isInLoop = other.isInLoop;
isFirstCase = other.isFirstCase;
currentCaseOperands = std::move(other.currentCaseOperands);
other.switchOperand = nullptr;
return *this;
}
};
std::vector<SwitchConversion> m_SwitchStack;
};

View File

@ -20,8 +20,6 @@ struct TextureSamplerDesc
class ToMetal : public Translator
{
protected:
GLLang language;
public:
explicit ToMetal(HLSLCrossCompilerContext *ctx)
: Translator(ctx)
@ -78,9 +76,9 @@ private:
// ToMetalInstruction.cpp
void AddOpAssignToDestWithMask(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask);
void AddAssignToDest(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis);
void AddAssignPrologue(int numParenthesis);
typedef enum
@ -96,8 +94,8 @@ private:
bool CanForceToHalfOperand(const Operand *psOperand);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise);
void CallBinaryOp(const char* name, Instruction* psInst,
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
@ -152,6 +150,14 @@ private:
BindingSlotAllocator m_TextureSlots, m_SamplerSlots;
BindingSlotAllocator m_BufferSlots;
struct BufferReflection
{
uint32_t bind;
bool isUAV;
bool hasCounter;
};
std::map<std::string, BufferReflection> m_BufferReflections;
std::vector<SamplerDesc> m_Samplers;
std::vector<TextureSamplerDesc> m_Textures;

View File

@ -714,6 +714,11 @@ static uint32_t DecodeInstructionSaturate(uint32_t ui32Token)
return (ui32Token & 0x00002000) ? 1 : 0;
}
static uint32_t DecodeInstructionPreciseMask(uint32_t ui32Token) // "precise" keyword
{
return (uint32_t)((ui32Token & 0x00780000) >> 19);
}
typedef enum OPERAND_MIN_PRECISION
{
OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision

View File

@ -430,10 +430,19 @@ static void ReadResources(const uint32_t* pui32Tokens,//in
pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i);
}
//Map resource bindings to constant buffers
if (psShaderInfo->psConstantBuffers.size())
{
/* HLSL allows the following:
cbuffer A
{...}
cbuffer A
{...}
And both will be present in the assembly if used
So we need to track which ones we matched already and throw an error if two buffers have the same name
*/
std::vector<uint32_t> alreadyBound(ui32NumConstantBuffers, 0);
for (i = 0; i < ui32NumResourceBindings; ++i)
{
ResourceGroup eRGroup;
@ -444,9 +453,11 @@ static void ReadResources(const uint32_t* pui32Tokens,//in
//Find the constant buffer whose name matches the resource at the given resource binding point
for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++)
{
if (psConstantBuffers[cbufIndex].name == psResBindings[i].name)
if (psConstantBuffers[cbufIndex].name == psResBindings[i].name && alreadyBound[cbufIndex] == 0)
{
psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex;
alreadyBound[cbufIndex] = 1;
break;
}
}
}

View File

@ -497,6 +497,11 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
inputName = oss.str();
}
if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName))
continue;
psContext->psDependencies->RecordHullShaderInput(inputName);
std::string outputName;
{
std::ostringstream oss;
@ -513,8 +518,9 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
prec = "highp ";
}
int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true);
int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false);
bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0);
int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex);
int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false, keepLocation, psContext->psShader->maxSemanticIndex);
psContext->AddIndentation();
if (ui32NumComponents > 1)
@ -562,6 +568,29 @@ GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage)
return language;
}
// Go through all declarations and remove reserve UAV occupied binding points
void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase, HLSLCrossCompilerContext *psContext, GLSLCrossDependencyData *glslDependencyData)
{
for (uint32_t p = 0; p < psPhase->psDecl.size(); ++p)
{
if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW ||
psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)
{
uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point
bstring BufNamebstr = bfromcstr("");
ResourceName(BufNamebstr, psContext, RGROUP_UAV, psPhase->psDecl[p].asOperands[0].ui32RegisterNumber, 0);
char *btmp = bstr2cstr(BufNamebstr, '\0');
std::string BufName = btmp;
bcstrfree(btmp);
bdestroy(BufNamebstr);
glslDependencyData->ReserveNamedBindPoint(BufName, uav, GLSLCrossDependencyData::BufferType_ReadWrite);
}
}
}
bool ToGLSL::Translate()
{
bstring glsl;
@ -578,6 +607,8 @@ bool ToGLSL::Translate()
psShader->ExpandSWAPCs();
psShader->ForcePositionToHighp();
psShader->AnalyzeIOOverlap();
if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0)
psShader->SetMaxSemanticIndex();
psShader->FindUnusedGlobals(psContext->flags);
psContext->indent = 0;
@ -633,18 +664,44 @@ bool ToGLSL::Translate()
bcatcstr(glsl, "#endif\n");
}
psShader->PrepareStructuredBufferBindingSlots();
if (psContext->psShader->eTargetLanguage != LANG_ES_100)
{
bool hasConstantBuffers = psContext->psShader->sInfo.psConstantBuffers.size() > 0;
if (hasConstantBuffers)
{
// This value will be replaced at runtime with 0 if we need to disable UBO.
bcatcstr(glsl, "#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1\n");
bcatcstr(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n#else\n#define UNITY_UNIFORM uniform\n#endif\n");
}
bool hasTextures = false;
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
{
if (psShader->asPhases[0].psDecl[i].eOpcode == OPCODE_DCL_RESOURCE)
{
hasTextures = true;
break;
}
}
if (hasTextures || hasConstantBuffers)
{
// This value will be replaced at runtime with 0 if we need to disable explicit uniform locations.
bcatcstr(glsl, "#define UNITY_SUPPORTS_UNIFORM_LOCATION 1\n");
bcatcstr(glsl, "#if UNITY_SUPPORTS_UNIFORM_LOCATION\n#define UNITY_LOCATION(x) layout(location = x)\n#define UNITY_BINDING(x) layout(binding = x, std140)\n#else\n#define UNITY_LOCATION(x)\n#define UNITY_BINDING(x) layout(std140)\n#endif\n");
}
}
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
ShaderPhase &phase = psShader->asPhases[ui32Phase];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
psShader->ResolveStructuredBufferBindingSlots(&phase);
phase.ResolveUAVProperties(psShader->sInfo);
ResolveStructuredBufferBindingSlots(&phase, psContext, psContext->psDependencies);
if (!psContext->IsVulkan() && !psContext->IsSwitch())
{
phase.PruneConstArrays();
psContext->ReserveFramebufferFetchInputs();
}
}
psShader->PruneTempRegisters();
@ -654,11 +711,6 @@ bool ToGLSL::Translate()
// Loop transform can only be done after the temps have been pruned
ShaderPhase &phase = psShader->asPhases[ui32Phase];
HLSLcc::DoLoopTransform(psContext, phase);
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{
IdentifyStaticBranches(&phase);
}
}
//Special case. Can have multiple phases.
@ -694,20 +746,16 @@ bool ToGLSL::Translate()
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
psContext->currentPhase = ui32Phase;
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
#endif
}
for (i = 0; i < psPhase->psDecl.size(); ++i)
{
TranslateDeclaration(&psPhase->psDecl[i]);
}
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{
DeclareSpecializationConstants(*psPhase);
}
bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase);
psContext->indent++;
@ -758,15 +806,19 @@ bool ToGLSL::Translate()
if (psPhase->earlyMain->slen > 1)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- Start Early Main ---\n");
#endif
}
bconcat(glsl, psPhase->earlyMain);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- End Early Main ---\n");
#endif
}
}
for (i = 0; i < psPhase->ui32InstanceCount; i++)
@ -777,15 +829,19 @@ bool ToGLSL::Translate()
if (psPhase->hasPostShaderCode)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- Post shader code ---\n");
#endif
}
bconcat(glsl, psPhase->postShaderCode);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- End post shader code ---\n");
#endif
}
}
@ -834,6 +890,13 @@ bool ToGLSL::Translate()
bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n");
}
}
if (m_NeedUnityPreTransformDecl)
{
if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
{
bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME);
}
}
bconcat(extensions, glsl);
bdestroy(glsl);
@ -912,11 +975,6 @@ bool ToGLSL::Translate()
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
}
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{
DeclareSpecializationConstants(psShader->asPhases[0]);
}
// Search and replace string, for injecting generated functions that need to be after default precision declarations
bconcat(glsl, generatedFunctionsKeyword);
@ -932,15 +990,19 @@ bool ToGLSL::Translate()
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- Start Early Main ---\n");
#endif
}
bconcat(glsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(glsl, "//--- End Early Main ---\n");
#endif
}
}
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
@ -952,18 +1014,26 @@ bool ToGLSL::Translate()
bcatcstr(glsl, "}\n");
// Print out extra functions we generated in generation order to satisfy dependencies
// Print out extra definitions and functions we generated in generation order to satisfy dependencies
{
bstring generatedFunctions = bfromcstr("");
bstring generatedFunctionsAndDefinitions = bfromcstr("");
for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i)
{
bcatcstr(generatedFunctionsAndDefinitions, m_AdditionalDefinitions[i].c_str());
bcatcstr(generatedFunctionsAndDefinitions, "\n");
}
for (std::vector<std::string>::const_iterator funcNameIter = m_FunctionDefinitionsOrder.begin(); funcNameIter != m_FunctionDefinitionsOrder.end(); ++funcNameIter)
{
const FunctionDefinitions::const_iterator definition = m_FunctionDefinitions.find(*funcNameIter);
ASSERT(definition != m_FunctionDefinitions.end());
bcatcstr(generatedFunctions, definition->second.c_str());
bcatcstr(generatedFunctions, "\n");
bcatcstr(generatedFunctionsAndDefinitions, definition->second.c_str());
bcatcstr(generatedFunctionsAndDefinitions, "\n");
}
bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctions, 0);
bdestroy(generatedFunctions);
bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctionsAndDefinitions, 0);
bdestroy(generatedFunctionsAndDefinitions);
bdestroy(generatedFunctionsKeyword);
}
// Concat extensions and glsl for the final shader code.
@ -978,6 +1048,13 @@ bool ToGLSL::Translate()
bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n");
}
}
if (m_NeedUnityPreTransformDecl)
{
if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
{
bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME);
}
}
bconcat(extensions, glsl);
bdestroy(glsl);
@ -1111,233 +1188,3 @@ void ToGLSL::UseExtraFunctionDependency(const std::string &name)
bdestroy(code);
}
void ToGLSL::DeclareSpecializationConstants(ShaderPhase &phase)
{
bstring glsl = psContext->glsl;
// There may be several uses for the same branch condition, so we'll need to keep track of what we've already declared.
std::set<uint32_t> alreadyDeclared;
for (std::vector<Instruction *>::iterator itr = phase.m_StaticBranchInstructions.begin(); itr != phase.m_StaticBranchInstructions.end(); itr++)
{
Instruction &i = **itr;
uint32_t slot = psContext->psDependencies->GetSpecializationConstantSlot(i.m_StaticBranchName);
if (alreadyDeclared.insert(slot).second) // Only declare if the insertion actually succeeded
bformata(glsl, "layout(constant_id = %d) const bool %s = false;\n", slot, i.m_StaticBranchName.c_str());
}
}
std::string to64(uint32_t in)
{
const char to64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
char c_[2];
c_[0] = to64[in];
c_[1] = 0;
char c = c_[0];
if (c == 'X')
return "XX";
if (c == '+')
return "XA";
if (c == '/')
return "XB";
return std::string(c_);
}
// Slightly custom base64, espace non-identifier chars with 'X'
static void Base64Encode(const std::string &in, std::string& result)
{
size_t len = in.length();
size_t outputLen = (len + 2) / 3 * 4;
unsigned char *bytes = (unsigned char *)&in[0];
result.clear();
result.reserve(outputLen);
int i = 0;
unsigned char b1, b2, b3;
for (int chunk = 0; len > 0; len -= 3, chunk++)
{
b1 = bytes[i++];
b2 = len > 1 ? bytes[i++] : '\0';
result += to64(b1 >> 2);
result += to64(((b1 & 3) << 4) | ((b2 & 0xf0) >> 4));
if (len > 2)
{
b3 = bytes[i++];
result += to64(((b2 & 0xF) << 2) | ((b3 & 0xC0) >> 6));
result += to64(b3 & 0x3F);
}
else if (len == 2)
{
result += to64((b2 & 0xF) << 2);
result += "XC";
break;
}
else /* len == 1 */
{
result += "XC";
break;
}
}
}
bool ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst)
{
std::ostringstream oss;
if (!inst.m_StaticBranchCondition)
{
// Simple case, just get the value, check if nonzero
bstring varname = bfromcstr("");
SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext);
uint32_t flag = TO_FLAG_NONE;
switch (argType)
{
case SVT_BOOL:
flag = TO_FLAG_BOOL;
break;
case SVT_INT:
case SVT_INT12:
case SVT_INT16:
flag = TO_FLAG_INTEGER;
break;
case SVT_UINT:
case SVT_UINT16:
case SVT_UINT8:
flag = TO_FLAG_UNSIGNED_INTEGER;
break;
default:
break;
}
TranslateOperand(varname, &inst.asOperands[0], flag);
char *str = bstr2cstr(varname, '\0');
oss << str;
bcstrfree(str);
bdestroy(varname);
oss << "!=0";
std::string res = oss.str();
// Sanity checks: no arrays, no matrices
if (res.find('[') != std::string::npos)
return false;
if (res.find("hlslcc_mtx") != std::string::npos)
return false;
Base64Encode(res, inst.m_StaticBranchName);
}
else
{
// Indirect, just store the whole previous instruction and then the condition
bstring res = bfromcstr("");
bstring *oldglsl = psContext->currentGLSLString;
psContext->currentGLSLString = &res;
TranslateInstruction((Instruction *)inst.m_StaticBranchCondition, true);
psContext->currentGLSLString = oldglsl;
SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext);
uint32_t flag = TO_FLAG_NONE;
switch (argType)
{
case SVT_BOOL:
flag = TO_FLAG_BOOL;
break;
case SVT_INT:
case SVT_INT12:
case SVT_INT16:
flag = TO_FLAG_INTEGER;
break;
case SVT_UINT:
case SVT_UINT16:
case SVT_UINT8:
flag = TO_FLAG_UNSIGNED_INTEGER;
break;
default:
break;
}
if (argType == SVT_BOOL)
{
if (inst.eBooleanTestType == INSTRUCTION_TEST_ZERO)
bcatcstr(res, "!");
}
TranslateOperand(res, &inst.asOperands[0], flag);
char *str = bstr2cstr(res, '\0');
oss << str;
bcstrfree(str);
bdestroy(res);
if (argType != SVT_BOOL)
oss << "!=0";
std::string ress = oss.str();
// Sanity checks: no arrays, no matrices
if (ress.find('[') != std::string::npos)
return false;
if (ress.find("hlslcc_mtx") != std::string::npos)
return false;
Base64Encode(ress, inst.m_StaticBranchName);
}
return true;
}
void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase)
{
for (std::vector<Instruction>::iterator itr = psPhase->psInst.begin(); itr != psPhase->psInst.end(); itr++)
{
Instruction &i = *itr;
if (!i.IsConditionalBranchInstruction())
continue;
// Simple case, direct conditional branch
if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER)
{
i.m_StaticBranchCondition = NULL;
if (BuildStaticBranchNameForInstruction(i))
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
}
}
// Indirect, comparison via another instruction
if (i.asOperands[0].eType == OPERAND_TYPE_TEMP)
{
// Check that the temp only has one visible definition
if (i.asOperands[0].m_Defines.size() == 1)
{
// ...and that it only uses constant buffers and immediates
Instruction &def = *i.asOperands[0].m_Defines[0].m_Inst;
bool isStatic = true;
for (uint32_t k = def.ui32FirstSrc; k < def.ui32NumOperands; k++)
{
Operand &o = def.asOperands[k];
if (!(o.eType == OPERAND_TYPE_CONSTANT_BUFFER || o.eType == OPERAND_TYPE_IMMEDIATE32))
{
isStatic = false;
break;
}
// Also check that the constant buffer access is "simple"
if (o.eType == OPERAND_TYPE_CONSTANT_BUFFER)
{
if (o.m_SubOperands[0].get() || o.m_SubOperands[1].get())
{
isStatic = false;
break;
}
}
}
if (isStatic)
{
i.m_StaticBranchCondition = &def;
if (BuildStaticBranchNameForInstruction(i))
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
}
else
i.m_StaticBranchCondition = NULL;
}
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -324,10 +324,11 @@ void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint3
}
}
static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents)
static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents, bool &needsBitcastOp)
{
if (psContext->psShader->eTargetLanguage == LANG_METAL)
{
needsBitcastOp = false;
std::ostringstream oss;
oss << "as_type<";
oss << GetConstructorForTypeMetal(to, numComponents);
@ -336,6 +337,7 @@ static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARI
}
else
{
needsBitcastOp = true;
if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT)
return "intBitsToFloat";
else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT)
@ -351,9 +353,8 @@ static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARI
}
// Helper function to print out a single 32-bit immediate value in desired format
static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value, SHADER_VARIABLE_TYPE eType)
static void printImmediate32(HLSLCrossCompilerContext *psContext, bstring glsl, uint32_t value, SHADER_VARIABLE_TYPE eType)
{
bstring glsl = *psContext->currentGLSLString;
int needsParenthesis = 0;
// Print floats as bit patterns.
@ -379,12 +380,10 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value
if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
bformata(glsl, "int(0x%Xu)", value);
else
bformata(glsl, "0x%X", value);
}
else if (value <= 1024) // Print anything below 1024 as decimal, and hex after that
bformata(glsl, "%d", value);
}
else
bformata(glsl, "0x%X", value);
bformata(glsl, "%d", value);
break;
case SVT_UINT:
case SVT_UINT16:
@ -409,9 +408,9 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value
bcatcstr(glsl, ")");
}
void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase)
void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion)
{
TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase);
TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase, forceNoConversion);
}
void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType)
@ -494,7 +493,7 @@ void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLAS
m_FunctionDefinitionsOrder.push_back(psName);
}
void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase)
void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion)
{
int numParenthesis = 0;
int hasCtor = 0;
@ -566,6 +565,8 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
requestedComponents = std::max(requestedComponents, numComponents);
bool needsBitcastOp = false;
if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME)))
{
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
@ -582,7 +583,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
{
hasCtor = 1;
if (eType == SVT_BOOL)
if (eType == SVT_BOOL && !forceNoConversion)
{
needsBoolUpscale = 1;
// make sure to wrap the whole thing in parens so the upscale
@ -590,13 +591,24 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
bcatcstr(glsl, "(");
numParenthesis++;
}
// case 1154828: In case of OPERAND_TYPE_INPUT_PRIMITIVEID we end up here with requestedComponents == 0, GetConstructorForType below would return empty string and we miss the cast to uint
if (requestedComponents < 1)
requestedComponents = 1;
bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false));
numParenthesis++;
}
else
{
// Direct cast not possible, need to do bitcast.
bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents).c_str());
if (IsESLanguage(psContext->psShader->eTargetLanguage) && (requestedType == SVT_UINT))
{
// without explicit cast Adreno may treat the return type of floatBitsToUint as signed int (case 1256567)
bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false));
numParenthesis++;
}
bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents, /*out*/ needsBitcastOp).c_str());
numParenthesis++;
}
}
@ -619,7 +631,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
{
if (psOperand->iNumComponents == 1)
{
printImmediate32(psContext, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType);
printImmediate32(psContext, glsl, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType);
}
else
{
@ -640,7 +652,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (firstItemAdded)
bcatcstr(glsl, ", ");
uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i]));
printImmediate32(psContext, uval, requestedType);
printImmediate32(psContext, glsl, uval, requestedType);
firstItemAdded = 1;
}
bcatcstr(glsl, ")");
@ -682,7 +694,8 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) ||
(psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) ||
(psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0))
(psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0) ||
(psSig->semanticName == "POSITION" && psSig->ui32SemanticIndex == 0))
{
bcatcstr(glsl, "gl_in");
TranslateOperandIndex(psOperand, 0);//Vertex index
@ -752,7 +765,16 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
{
int stream = 0;
std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0);
// If we are writing out to built in type then we need to redirect tot he built in arrays
// this is safe to do as HLSL enforces 1:1 mapping, so output maps to gl_InvocationID by default
if (name == "gl_Position" && psContext->psShader->eShaderType == HULL_SHADER)
{
bcatcstr(glsl, "gl_out[gl_InvocationID].");
}
bcatcstr(glsl, name.c_str());
if (psOperand->m_SubOperands[0].get())
{
bcatcstr(glsl, "[");
@ -1327,6 +1349,25 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
case OPERAND_TYPE_NULL:
{
// Null register, used to discard results of operations
if (psContext->psShader->eTargetLanguage == LANG_ES_100)
{
// On ES2 we can pass this as an argument to a function, e.g. fake integer operations that we do. See case 1124159.
bcatcstr(glsl, "null");
bool alreadyDeclared = false;
std::string toDeclare = "vec4 null;";
for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i)
{
if (toDeclare == m_AdditionalDefinitions[i])
{
alreadyDeclared = true;
break;
}
}
if (!alreadyDeclared)
m_AdditionalDefinitions.push_back(toDeclare);
}
else
bcatcstr(glsl, "//null");
break;
}
@ -1564,6 +1605,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
*pui32IgnoreSwizzle = 1;
}
if (needsBitcastOp && (*pui32IgnoreSwizzle == 0))
{
// some glsl compilers (Switch's GLSLc) emit warnings "u_xlat.w uninitialized" if generated code looks like: "floatBitsToUint(u_xlat).xz". Instead, generate: "floatBitsToUint(u_xlat.xz)"
TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0);
*pui32IgnoreSwizzle = 1;
}
if (needsBoolUpscale)
{
if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8)
@ -1573,7 +1621,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
bcatcstr(glsl, ") * int(0xffffffffu)");
else
bcatcstr(glsl, ") * int(0xffff)"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16)
bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16)
}
numParenthesis--;
@ -1588,12 +1636,12 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
}
}
void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask)
void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion)
{
TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask);
TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask, forceNoConversion);
}
void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask)
void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion)
{
uint32_t ui32IgnoreSwizzle = 0;
int iRebase = 0;
@ -1615,7 +1663,7 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u
if (ui32TOFlag & TO_FLAG_NAME_ONLY)
{
TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase);
TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase, forceNoConversion);
return;
}
@ -1642,7 +1690,7 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u
}
}
TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase);
TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase, forceNoConversion);
if (psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT &&
psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE

View File

@ -68,6 +68,9 @@ static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
psContext->AddIndentation();
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
bformata(glsl, "%s%s = %scp[controlPointID].%s;\n", psContext->outputPrefix, "mtl_Position", psContext->inputPrefix, "mtl_Position");
else
bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
}
}
@ -85,6 +88,8 @@ bool ToMetal::Translate()
psShader->ExpandSWAPCs();
psShader->ForcePositionToHighp();
psShader->AnalyzeIOOverlap();
if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0)
psShader->SetMaxSemanticIndex();
psShader->FindUnusedGlobals(psContext->flags);
psContext->indent = 0;
@ -136,7 +141,7 @@ bool ToMetal::Translate()
ShaderPhase &phase = psShader->asPhases[ui32Phase];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
phase.ResolveUAVProperties(psShader->sInfo);
ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan
HLSLcc::DoLoopTransform(psContext, phase);
}
@ -189,9 +194,10 @@ bool ToMetal::Translate()
continue;
psContext->currentPhase = ui32Phase;
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
// bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
#endif
}
for (i = 0; i < psPhase->psDecl.size(); ++i)
{
TranslateDeclaration(&psPhase->psDecl[i]);
@ -205,9 +211,13 @@ bool ToMetal::Translate()
}
else
{
psContext->indent++;
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
psContext->indent--;
// Output default implementations for framebuffer index remap if needed
if (m_NeedFBOutputRemapDecl)
bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n");
@ -383,6 +393,13 @@ bool ToMetal::Translate()
else if (psShader->eShaderType == HULL_SHADER)
mem.second.assign("// mtl_InstanceID passed through groupID");
}
else if (mem.first == "mtl_BaseInstance")
{
if (psShader->eShaderType == VERTEX_SHADER)
mem.second.assign("uint mtl_BaseInstance");
else if (psShader->eShaderType == HULL_SHADER)
mem.second.assign("// mtl_BaseInstance ignored");
}
else if (mem.first == "mtl_VertexID")
{
if (psShader->eShaderType == VERTEX_SHADER)
@ -392,6 +409,15 @@ bool ToMetal::Translate()
else if (psShader->eShaderType == DOMAIN_SHADER)
mem.second.assign("// mtl_VertexID unused");
}
else if (mem.first == "mtl_BaseVertex")
{
if (psShader->eShaderType == VERTEX_SHADER)
mem.second.assign("uint mtl_BaseVertex");
else if (psShader->eShaderType == HULL_SHADER)
mem.second.assign("// mtl_BaseVertex generated in compute kernel");
else if (psShader->eShaderType == DOMAIN_SHADER)
mem.second.assign("// mtl_BaseVertex unused");
}
});
}
@ -467,6 +493,23 @@ bool ToMetal::Translate()
bcatcstr(bodyglsl, ",\n");
}
// Figure and declare counters and their binds (we also postponed buffer reflection until now)
for (auto it = m_BufferReflections.begin(); it != m_BufferReflections.end(); ++it)
{
uint32_t bind = it->second.bind;
if (it->second.hasCounter)
{
const uint32_t counterBind = m_BufferSlots.PeekFirstFreeSlot();
m_BufferSlots.ReserveBindingSlot(counterBind, BindingSlotAllocator::UAV);
bformata(bodyglsl, ",\n\t\tdevice atomic_uint* %s_counter [[ buffer(%d) ]]", it->first.c_str(), counterBind);
// Offset with 1 so we can capture counters that are bound to slot 0 (if, say, user decides to start buffers at register 1 or higher)
bind |= ((counterBind + 1) << 16);
}
psContext->m_Reflection.OnBufferBinding(it->first, bind, it->second.isUAV);
}
bcatcstr(bodyglsl, ")\n{\n");
if (popPragmaDiagnostic)
@ -474,6 +517,33 @@ bool ToMetal::Translate()
if (psShader->eShaderType != COMPUTE_SHADER)
{
if (psShader->eShaderType == VERTEX_SHADER)
{
// Fix HLSL compatibility with DrawProceduralIndirect, SV_InstanceID always starts at 0 but with Metal, a base instance was not subtracted for equal behavior
// Base semantics available everywhere starting with iOS9 (except hardware limitation exists with the original Apple A7/A8 GPUs, causing UNITY_SUPPORT_INDIRECT_BUFFERS=0)
std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&](MemberDefinitions::value_type &mem)
{
if (mem.first == "mtl_InstanceID")
{
bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "mtl_BaseInstance = 0;\n");
bcatcstr(bodyglsl, "#endif\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "mtl_InstanceID = mtl_InstanceID - mtl_BaseInstance;\n");
}
else if (mem.first == "mtl_VertexID")
{
bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "mtl_BaseVertex = 0;\n");
bcatcstr(bodyglsl, "#endif\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "mtl_VertexID = mtl_VertexID - mtl_BaseVertex;\n");
}
});
}
if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0)
{
psContext->AddIndentation();
@ -498,7 +568,9 @@ bool ToMetal::Translate()
bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n");
bcatcstr(bodyglsl, "const uint mtl_BaseInstance = 0;\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y - mtl_BaseInstance;\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n");
psContext->AddIndentation();
@ -507,7 +579,9 @@ bool ToMetal::Translate()
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint mtl_VertexID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n");
bcatcstr(bodyglsl, "const uint mtl_BaseVertex = 0;\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint mtl_VertexID = ((mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x) - mtl_BaseVertex;\n");
psContext->AddIndentation();
bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str());
@ -563,15 +637,19 @@ bool ToMetal::Translate()
if (psPhase->earlyMain->slen > 1)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
}
bconcat(bodyglsl, psPhase->earlyMain);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
}
}
psContext->AddIndentation();
@ -618,15 +696,19 @@ bool ToMetal::Translate()
if (psPhase->hasPostShaderCode)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Post shader code ---\n");
#endif
}
bconcat(bodyglsl, psPhase->postShaderCode);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End post shader code ---\n");
#endif
}
}
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
@ -676,15 +758,19 @@ bool ToMetal::Translate()
{
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
{
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
}
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
}
}
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)

View File

@ -2,6 +2,8 @@
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/languages.h"
#include <algorithm>
#include <sstream>
#include <cmath>
@ -19,7 +21,9 @@ using namespace HLSLcc;
bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect)
{
if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_TessFactor")
if (sig)
{
if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor")
{
if (pui32IgnoreSwizzle)
*pui32IgnoreSwizzle = 1;
@ -32,7 +36,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
return true;
}
if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_InsideTessFactor")
if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor")
{
if (pui32IgnoreSwizzle)
*pui32IgnoreSwizzle = 1;
@ -47,21 +51,19 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
return true;
}
if (sig && sig->semanticName == "SV_InstanceID")
if (sig->semanticName == "SV_InstanceID")
{
if (pui32IgnoreSwizzle)
*pui32IgnoreSwizzle = 1;
}
if (sig && ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) &&
if (((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) &&
((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0)))
{
result = "mtl_Position";
return true;
}
if (sig)
{
switch (sig->eSystemValueType)
{
case NAME_POSITION:
@ -120,7 +122,6 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
default:
break;
}
}
if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE ||
psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE)
@ -130,6 +131,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
result = oss.str();
return true;
}
}
switch (psOperand->eType)
{
@ -176,7 +178,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
case OPERAND_TYPE_INPUT:
{
std::ostringstream oss;
ASSERT(sig != NULL);
ASSERT(sig != nullptr);
oss << sig->semanticName << sig->ui32SemanticIndex;
result = oss.str();
if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL)
@ -186,6 +188,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
{
std::ostringstream oss;
ASSERT(sig != nullptr);
oss << sig->semanticName << sig->ui32SemanticIndex;
result = oss.str();
if (outSkipPrefix != NULL) *outSkipPrefix = true;
@ -194,6 +197,7 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
case OPERAND_TYPE_INPUT_CONTROL_POINT:
{
std::ostringstream oss;
ASSERT(sig != nullptr);
oss << sig->semanticName << sig->ui32SemanticIndex;
result = oss.str();
if (outSkipPrefix != NULL) *outSkipPrefix = true;
@ -242,6 +246,7 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl)
break;
case NAME_INSTANCE_ID:
m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]"));
m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseInstance", "uint mtl_BaseInstance [[ base_instance ]]")); // Requires Metal runtime 1.1+
break;
case NAME_IS_FRONT_FACE:
m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]"));
@ -251,6 +256,7 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl)
break;
case NAME_VERTEX_ID:
m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]"));
m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseVertex", "uint mtl_BaseVertex [[ base_vertex ]]")); // Requires Metal runtime 1.1+
break;
case NAME_PRIMITIVE_ID:
// Not on Metal
@ -345,7 +351,6 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl)
m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]"));
break;
case NAME_RENDER_TARGET_ARRAY_INDEX:
// Only supported on a Mac
m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]"));
break;
case NAME_CLIP_DISTANCE:
@ -404,6 +409,8 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl)
ASSERT(0); // Wut
break;
}
psContext->m_Reflection.OnBuiltinOutput(psDecl->asOperands[0].eSpecialName);
}
static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents)
@ -467,6 +474,9 @@ void ToMetal::DeclareHullShaderPassthrough()
name = oss.str();
}
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
name = "mtl_Position";
uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents);
@ -488,7 +498,8 @@ void ToMetal::DeclareHullShaderPassthrough()
oss << typeName << " " << name;
// VERTEX_SHADER hardcoded on purpose
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true);
bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0);
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex);
oss << " [[ " << "attribute(" << loc << ")" << " ]] ";
psContext->m_Reflection.OnInputBinding(name, loc);
@ -717,9 +728,6 @@ static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psCont
if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0)
{
access = "write";
if (psContext->psShader->eShaderType != COMPUTE_SHADER)
psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of texture writes on non-compute shaders.", 0, false);
if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0)
{
access = "read_write";
@ -1099,11 +1107,6 @@ void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool
BufConst = "const ";
oss << BufConst;
}
else
{
if (psContext->psShader->eShaderType != COMPUTE_SHADER)
psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false);
}
if (isRaw)
oss << "device uint *" << BufName;
@ -1114,23 +1117,12 @@ void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool
oss << " [[ buffer(" << loc << ") ]]";
m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str()));
psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV);
// We don't do REAL reflection here, we need to collect all data and figure out if we're dealing with counters.
// And if so - we need to patch counter binding info, add counters to empty slots, etc
const BufferReflection br = { loc, isUAV, psDecl->sUAV.bCounter != 0 };
m_BufferReflections.insert(std::make_pair(BufName, br));
}
// In addition to the actual declaration, we need pointer modification and possible counter declaration
// in early main:
// Possible counter is always in the beginning of the buffer
if (isUAV && psDecl->sUAV.bCounter)
{
bformata(GetEarlyMain(psContext), "device atomic_uint *%s_counter = reinterpret_cast<device atomic_uint *> (%s);\n", BufName.c_str(), BufName.c_str());
}
// Some GPUs don't allow memory access below buffer binding offset in the shader so always bind compute buffer
// at offset 0 instead of GetDataOffset().
// We can't tell at shader compile time if the buffer actually has counter or not. Therefore we'll always reserve
// space for the counter and bump the data pointer to beginning of the actual data here.
bformata(GetEarlyMain(psContext), "%s = reinterpret_cast<%sdevice %s *> (reinterpret_cast<device %satomic_uint *> (%s) + 1);\n", BufName.c_str(), BufConst.c_str(), (isRaw ? "uint" : BufType.c_str()), BufConst.c_str(), BufName.c_str());
}
static int ParseInlineSamplerWrapMode(const std::string& samplerName, const std::string& wrapName)
@ -1185,6 +1177,11 @@ static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::st
return false;
}
// Starting with macOS 11/iOS 14, the metal compiler will warn about unused inline samplers, that might
// happen on mobile due to _mtl_xl_shadow_sampler workaround that's required for pre-GPUFamily3.
if (hasCompare && IsMobileTarget(psContext))
return true;
bstring str = GetEarlyMain(psContext);
bformata(str, "constexpr sampler %s(", name.c_str());
@ -1194,7 +1191,7 @@ static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::st
if (hasTrilinear)
bformata(str, "filter::linear,mip_filter::linear,");
else if (hasLinear)
bformata(str, "filter::linear,");
bformata(str, "filter::linear,mip_filter::nearest,");
else
bformata(str, "filter::nearest,");
@ -1276,7 +1273,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
}
//Already declared as part of an array.
if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1)
if (psDecl->eOpcode == OPCODE_DCL_INPUT && psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1)
{
break;
}
@ -1401,7 +1398,8 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
{
std::ostringstream oss;
// VERTEX_SHADER hardcoded on purpose
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true);
bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0);
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex);
oss << "attribute(" << loc << ")";
semantic = oss.str();
psContext->m_Reflection.OnInputBinding(name, loc);
@ -2391,6 +2389,7 @@ void ToMetal::DeclareOutput(const Declaration *psDecl)
oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]";
m_NeedFBOutputRemapDecl = true;
m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str()));
psContext->m_Reflection.OnFragmentOutputDeclaration(iNumComponents, psSignature->ui32SemanticIndex);
}
}
break;
@ -2412,6 +2411,9 @@ void ToMetal::DeclareOutput(const Declaration *psDecl)
oss << " [[ user(" << name << ") ]]";
m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str()));
if (psContext->psShader->eShaderType == VERTEX_SHADER)
psContext->m_Reflection.OnVertexProgramOutput(name, psSignature->semanticName, psSignature->ui32SemanticIndex);
// For preserving data layout, declare output struct as domain shader input, too
if (psContext->psShader->eShaderType == HULL_SHADER)
{
@ -2421,7 +2423,8 @@ void ToMetal::DeclareOutput(const Declaration *psDecl)
oss << type << " " << name;
// VERTEX_SHADER hardcoded on purpose
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true);
bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0);
uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex);
oss << " [[ " << "attribute(" << loc << ")" << " ]] ";
psContext->m_Reflection.OnInputBinding(name, loc);
@ -2439,7 +2442,8 @@ void ToMetal::DeclareOutput(const Declaration *psDecl)
void ToMetal::EnsureShadowSamplerDeclared()
{
if (m_ShadowSamplerDeclared)
// on macos we will set comparison func from the app side
if (m_ShadowSamplerDeclared || !IsMobileTarget(psContext))
return;
if ((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER))

File diff suppressed because it is too large Load Diff

View File

@ -573,61 +573,61 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
{
case SVT_FLOAT:
ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0);
if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_FLOAT16:
ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("16_");
if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_FLOAT10:
ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("10_");
if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_INT:
ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("i");
if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_INT16:
ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("i16_");
if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_INT12:
ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("i12_");
if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_UINT:
ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("u");
if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_UINT16:
ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("u16_");
if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_DOUBLE:
ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("d");
if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
case SVT_BOOL:
ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0);
oss << ("b");
if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle)
if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1)
*pui32IgnoreSwizzle = 1;
break;
default:
@ -843,7 +843,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
}
if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
if (psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
{
switch (rebase)
{
@ -887,7 +887,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
}
if (psVarType && psVarType->Class == SVC_SCALAR)
if (psVarType->Class == SVC_SCALAR)
{
*pui32IgnoreSwizzle = 1;