Pulled changes from internal unity repo at changeset 42ec4a54d7ca

This commit is contained in:
Mikko Strandborg 2017-07-31 10:46:48 +03:00
parent 5f8fc43675
commit ab352e4603
17 changed files with 810 additions and 526 deletions

View File

@ -451,6 +451,8 @@ public:
static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false);
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t (&pui32Swizzle)[4],
const ConstantBuffer* psCBuf,
@ -460,7 +462,7 @@ public:
int32_t* pi32Rebase,
uint32_t flags);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
// Apply shader precision information to resource bindings
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);

View File

@ -444,8 +444,8 @@ static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
// If set, emits for NVN, the Nvidia-provided graphics API for Nintendo Switch.
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x100000;
// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead.
static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000;
// If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants
static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000;
@ -453,6 +453,12 @@ static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000
// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch
static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000;
// Build for Switch.
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000;
// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name
// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue.
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000;
#ifdef __cplusplus
extern "C" {

View File

@ -315,13 +315,6 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
break;
case OPCODE_AND:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CALLC:
@ -344,12 +337,18 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
break;
case OPCODE_AND:
case OPCODE_OR:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
// Integer ops that don't care of signedness
case OPCODE_IADD:
case OPCODE_INEG:
case OPCODE_ISHL:
case OPCODE_NOT:
case OPCODE_OR:
case OPCODE_XOR:
case OPCODE_BUFINFO:
case OPCODE_COUNTBITS:
@ -673,7 +672,9 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
}
}
if (foundImmediate && dataType == SVT_VOID)
// Use at minimum int type when any operand is immediate.
// Allowing bool could lead into bugs like case 883080
if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL))
dataType = SVT_INT;
if (dataType != SVT_VOID)

View File

@ -101,7 +101,9 @@ void HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
return;
m_EnabledExtensions.insert(extName);
bformata(extensions, "#ifdef %s\n", extName.c_str());
bformata(extensions, "#extension %s : require\n", extName.c_str());
bcatcstr(extensions, "#endif\n");
}
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const

View File

@ -457,11 +457,13 @@ namespace HLSLcc
return false;
}
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f)

View File

@ -475,12 +475,11 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
const ShaderVarType* psVarType = NULL;
int32_t rebase = -1;
bool isArray;
int foundVar;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
if (psCBuf)
{
foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing?
int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar)
{
return psVarType->Type;
}
@ -584,3 +583,66 @@ int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) cons
// TODO: Are there ever any cases where the mask has 'holes'?
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
}
Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const
{
Operand *psDynIndexOp = m_SubOperands[0].get();
if (psDynIndexOp == NULL)
psDynIndexOp = m_SubOperands[1].get();
*needsIndexCalcRevert = false;
if (psDynIndexOp != NULL && isAoS)
{
// if dynamically indexing array of structs, try using the original index var before the float4 address calc
bool indexVarFound = false;
*needsIndexCalcRevert = true;
Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst;
Operand *asOps = psDynIndexOrigin->asOperands;
Operand *psOriginOp = NULL;
// DXBC always addresses as float4, find the address calculation
// Special case where struct is float4 size, no extra calc is done
if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway
{
indexVarFound = true;
*needsIndexCalcRevert = false;
}
else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL)
{
// check which one of the src operands is the original index
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[2];
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[3];
}
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)
{
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[1];
}
if (psOriginOp != NULL)
{
indexVarFound = true;
// Check if the mul dest is not the same temp as the src. Also check that the temp
// does not have multiple uses (which could override the value)
// -> we can use src straight and no index revert calc is needed
if ((psOriginOp->eType == OPERAND_TYPE_INPUT)
|| ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext))
&& psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
{
psDynIndexOp = psOriginOp;
*needsIndexCalcRevert = false;
}
}
// Atm we support only this very basic case of dynamic indexing array of structs.
// Return error if something else is encountered.
if (!indexVarFound)
psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true);
}
return psDynIndexOp;
}

View File

@ -150,29 +150,37 @@ int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType,
return 0;
}
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors)
uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize)
{
// Struct size is calculated from the offset and size of its last member
// Default is regular matrices, vectors and scalars
uint32_t size = psType->Columns * psType->Rows * 4;
// Struct size is calculated from the offset and size of its last member.
// Need to take into account that members could be arrays.
if (psType->Class == SVC_STRUCT)
{
return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors);
size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true);
}
// Matrices represented as vec4 arrays have special size calculation
if (matrixAsVectors)
else if (matrixAsVectors)
{
if (psType->Class == SVC_MATRIX_ROWS)
{
return psType->Rows * 16;
size = psType->Rows * 16;
}
else if (psType->Class == SVC_MATRIX_COLUMNS)
{
return psType->Columns * 16;
size = psType->Columns * 16;
}
}
// Regular matrices, vectors and scalars
return psType->Columns * psType->Rows * 4;
if (wholeArraySize && psType->Elements > 1)
{
uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size
size = (psType->Elements - 1) * paddedSize + size; // Except the last element
}
return size;
}
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
@ -184,10 +192,8 @@ static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
uint32_t flags)
{
uint32_t thisOffset = parentOffset + psType->Offset;
uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
uint32_t paddedSize = thisSize;
if (thisSize % 16 > 0)
paddedSize += (16 - (thisSize % 16));
uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
uint32_t paddedSize = ((thisSize + 15) / 16) * 16;
uint32_t arraySize = thisSize;
// Array elements are padded to align on vec4 size, except for the last one
@ -308,7 +314,7 @@ int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
// Searches for brackets and inserts indices one by one.
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices)
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
{
std::ostringstream oss;
size_t prevpos = 0;
@ -318,8 +324,29 @@ std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShade
{
pos++;
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
if (i < indices.size())
// Add possibly given dynamic index for the root array.
if (i == 0 && !dynamicIndex.empty())
{
oss << dynamicIndex;
// if we couldn't use original index temp, revert the float4 address calc here
if (revertDynamicIndexCalc)
{
const ShaderVarType* psRootVar = psShaderVar;
while (psRootVar->Parent != NULL)
psRootVar = psRootVar->Parent;
uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4
oss << " / " << thisSize;
}
if (!indices.empty() && indices[i] != 0)
oss << " + " << indices[i];
}
else if (i < indices.size())
oss << indices[i];
prevpos = pos;
i++;
pos = psShaderVar->fullName.find('[', prevpos);

View File

@ -92,6 +92,11 @@ public:
// Same as above but with explicit shader type and phase
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
// Find the operand that contains the dynamic index for this operand (array in constant buffer).
// When isAoS is true, we'll try to find the original index var to avoid additional calculations.
// needsIndexCalcRevert output will tell if we need to divide the value to get the correct index.
Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const;
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);

View File

@ -21,4 +21,6 @@ std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup grou
std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name);
#endif

View File

@ -165,6 +165,8 @@ private:
void AddComparison(Instruction* psInst, ComparisonType eType,
uint32_t typeFlag);
bool CanForceToHalfOperand(const Operand *psOperand);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void CallBinaryOp(const char* name, Instruction* psInst,

View File

@ -643,7 +643,7 @@ bool ToGLSL::Translate()
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{
DeclareSpecializationConstants(psShader->asPhases[i]);
DeclareSpecializationConstants(*psPhase);
}

View File

@ -12,16 +12,18 @@
#include <float.h>
#include <sstream>
#include <algorithm>
#include <cmath>
#include "internal_includes/toGLSL.h"
using namespace HLSLcc;
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#include <cmath>
#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x)))
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
static void DeclareConstBufferShaderVariable(const HLSLCrossCompilerContext *psContext, const char* Name, const struct ShaderVarType* psType, int unsizedArray, bool addUniformPrefix = false)
//const SHADER_VARIABLE_CLASS eClass, const SHADER_VARIABLE_TYPE eType,
@ -877,14 +879,23 @@ static void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint3
if (psContext->flags & HLSLCC_FLAG_WRAP_UBO)
bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n");
if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME)
{
std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
bformata(glsl, "} %s;\n", instanceName.c_str());
}
else
bcatcstr(glsl, "};\n");
if (psContext->flags & HLSLCC_FLAG_WRAP_UBO)
bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n");
}
static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint,
const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess,
const uint32_t isRaw, const uint32_t isUAV, const uint32_t stride, bstring glsl)
const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl)
{
const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
bstring BufNamebstr = bfromcstr("");
@ -921,6 +932,9 @@ static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t
bformata(glsl, "buffer %s {\n\t", BufName.c_str());
if (hasEmbeddedCounter)
bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str());
if (isRaw)
bcatcstr(glsl, "uint");
else
@ -1201,7 +1215,10 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const
{
// Need to enable extension (either OES or ARB), but we only need to add it once
if (IsESLanguage(psContext->psShader->eTargetLanguage))
{
psContext->RequireExtension("GL_OES_texture_cube_map_array");
psContext->RequireExtension("GL_EXT_texture_cube_map_array");
}
else
psContext->RequireExtension("GL_ARB_texture_cube_map_array");
}
@ -1997,8 +2014,8 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
}
if(numViews > 0 && numViews < 10)
{
bcatcstr(extensions, "#extension GL_OVR_multiview : require\n");
bcatcstr(extensions, "#extension GL_OVR_multiview2 : enable\n");
// multiview2 is required because we have built-in shaders that do eye-dependent work other than just position
bcatcstr(extensions, "#extension GL_OVR_multiview2 : require\n");
if(psShader->eShaderType == VERTEX_SHADER)
bformata(glsl, "layout(num_views = %d) in;\n", numViews);
@ -2349,7 +2366,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
};
bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i);
if (fpcheck(val[chunk.second.m_Rebase]))
bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]);
bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]);
else
HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]);
bcatcstr(tgt, ";\n");
@ -2371,7 +2388,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
if (k != 0)
bcatcstr(tgt, ", ");
if (fpcheck(val[k]))
bformata(tgt, "uintBitsToFloat(uint(%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
else
HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]);
}
@ -2807,6 +2824,7 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
{
const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0;
if(psDecl->sUAV.bCounter)
{
if (isVulkan)
@ -2815,6 +2833,14 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true);
GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1);
bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str());
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
}
else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead.
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl);
}
else
{
@ -2824,12 +2850,18 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
bcatcstr(glsl, "highp ");
bcatcstr(glsl, "atomic_uint ");
ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0);
bformata(glsl, "_counter; \n");
}
}
bcatcstr(glsl, "_counter; \n");
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, psDecl->ui32BufferStride, glsl);
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
}
}
else
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl);
}
break;
}
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
@ -2856,20 +2888,20 @@ void ToGLSL::TranslateDeclaration(const Declaration* psDecl)
}
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, psDecl->ui32BufferStride, glsl);
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl);
break;
}
case OPCODE_DCL_RESOURCE_STRUCTURED:
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, psDecl->ui32BufferStride, glsl);
psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl);
break;
}
case OPCODE_DCL_RESOURCE_RAW:
{
DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0],
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, psDecl->ui32BufferStride, glsl);
psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl);
break;
}
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:

View File

@ -1925,6 +1925,7 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
bstring glsl = *psContext->currentGLSLString;
int numParenthesis = 0;
const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0);
const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0);
if (!isEmbedded)
{
@ -2130,10 +2131,10 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
}
else
{
// Do component-wise and, glsl doesn't support && on bvecs
// Do component-wise and, glsl doesn't support || on bvecs
for (uint32_t k = 0; k < 4; k++)
{
if ((destMask && (1 << k)) == 0)
if ((destMask & (1 << k)) == 0)
continue;
int needsParenthesis = 0;
@ -2341,8 +2342,20 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
bcatcstr(glsl, "//UDIV\n");
#endif
//destQuotient, destRemainder, src0, src1
// There are cases where destQuotient is the same variable as src0 or src1. If that happens,
// we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first.
if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber)
&& (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber))
{
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
}
else
{
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
}
break;
}
case OPCODE_DIV:
@ -3673,6 +3686,8 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
break;
default:
ASSERT(0);
// Suppress uninitialised variable warning
srcDataType = SVT_VOID;
break;
}
@ -4010,13 +4025,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
#endif
psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis);
if (isVulkan)
if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, "atomicAdd(");
else
bcatcstr(glsl, "atomicCounterIncrement(");
ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0);
bformata(glsl, "_counter");
if (isVulkan)
if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, ", 1u)");
else
bcatcstr(glsl, ")");
@ -4031,13 +4046,13 @@ void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = fals
#endif
psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis);
if (isVulkan)
if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, "(atomicAdd(");
else
bcatcstr(glsl, "atomicCounterDecrement(");
ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0);
bformata(glsl, "_counter");
if (isVulkan)
if (isVulkan || avoidAtomicCounter)
bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)");
else
bcatcstr(glsl, ")");

View File

@ -16,11 +16,13 @@
using namespace HLSLcc;
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
// Returns nonzero if types are just different precisions of the same underlying type
@ -434,8 +436,19 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (psOperand->eType == OPERAND_TYPE_INPUT)
{
// Check for scalar
if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()
&& psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
// You would think checking would be easy but there is a caveat:
// checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved
// as an example consider we have input:
// float2 x; float y;
// and later on we do
// tex2D(xxx, fixed2(x.x, y));
// in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified"
// so we may end up with treating it as scalar (even though it is vector now)
const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber];
const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE;
const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber];
if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE))
{
scalarWithSwizzle = 1;
*pui32IgnoreSwizzle = 1;
@ -884,8 +897,8 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
//Work out the variable name. Don't apply swizzle to that variable yet.
int32_t rebase = 0;
if(psCBuf)
{
ASSERT(psCBuf != NULL);
uint32_t componentsNeeded = 1;
uint32_t minSwiz = 3;
uint32_t maxSwiz = 0;
@ -917,10 +930,40 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// Get a possible dynamic array index
bstring dynamicIndex = bfromcstr("");
bool needsIndexCalcRevert = false;
bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1));
Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert);
if (psDynIndexOp != NULL)
{
SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
TranslateOperand(dynamicIndex, psDynIndexOp, opFlags);
}
char *tmp = bstr2cstr(dynamicIndex, '\0');
std::string dynamicIndexStr = tmp;
bcstrfree(tmp);
bdestroy(dynamicIndex);
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns))
{
// Simple case: just access one component
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf)
{
std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
bformata(glsl, "%s.", instanceName.c_str());
}
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
@ -941,6 +984,14 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
else
{
// Non-simple case: build vec4 and apply mask
std::string instanceNamePrefix;
if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf)
{
std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name);
instanceNamePrefix = instanceName + ".";
}
uint32_t i;
std::vector<uint32_t> tmpArrayIndices;
bool tmpIsArray;
@ -963,11 +1014,11 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (tmpVarType->Class == SVC_SCALAR)
{
bformata(glsl, "%s", fullName.c_str());
bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str());
}
else
{
@ -975,7 +1026,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
bformata(glsl, "%s", fullName.c_str());
bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str());
bformata(glsl, ".%c", "xyzw"[swizzle]);
}
}
@ -985,94 +1036,36 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
// Also swizzle.
*pui32IgnoreSwizzle = 1;
}
}
else // We don't have a semantic for this variable, so try the raw dump appoach.
{
ASSERT(0);
//bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);//
//index = psOperand->aui32ArraySizes[1];
}
if (isArray)
{
index = arrayIndices.back();
//Dx9 only?
if(psOperand->m_SubOperands[0].get() != NULL)
// Dynamic index is atm supported only at the root array level. Add here only if there is no such parent.
bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1);
bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0);
if (hasDynamicIndex || hasImmediateIndex)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
uint32_t opFlags = TO_FLAG_INTEGER;
std::ostringstream fullIndexOss;
if (hasDynamicIndex && hasImmediateIndex)
fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")";
else if (hasDynamicIndex)
fullIndexOss << dynamicIndexStr;
else // hasImmediateStr
fullIndexOss << index;
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
bcatcstr(glsl, "[(");
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags);
bformata(glsl, ") / 4]");
// Special handling for old matrix arrays
bformata(glsl, "[%s / 4]", fullIndexOss.str().c_str());
bformata(glsl, "[%s %% 4]", fullIndexOss.str().c_str());
}
else // This path is atm the default
{
bcatcstr(glsl, "[((");
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X);
bformata(glsl, ") %% 4)]");
bformata(glsl, "[%s]", fullIndexOss.str().c_str());
}
}
else
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags);
bformata(glsl, "]");
}
}
else
if(index != -1 && psOperand->m_SubOperands[1].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
if (((psVarType->Class == SVC_MATRIX_COLUMNS) ||( psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
bcatcstr(glsl, "[(");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
bformata(glsl, " + %d) / 4]", index);
{
bcatcstr(glsl, "[((");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
bformata(glsl, " + %d) %% 4)]", index);
}
}
else
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags);
if (index != 0)
bformata(glsl, " + %d]", index);
else
bcatcstr(glsl, "]");
}
}
else if(index != -1)
{
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays, open them up into vec4's
size_t matidx = index / 4;
size_t rowidx = index - (matidx*4);
bformata(glsl, "[%d][%d]", matidx, rowidx);
}
else
{
bformata(glsl, "[%d]", index);
}
}
else if(psOperand->m_SubOperands[1].get() != NULL)
{
bcatcstr(glsl, "[");
TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER);
bcatcstr(glsl, "]");
}
if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
@ -1642,3 +1635,41 @@ void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint3
std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare);
bcatcstr(str, texturesamplername.c_str());
}
// Take an uniform buffer name and generate an instance name.
std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name)
{
if (name == "$Globals")
{
char prefix = 'A';
// Need to tweak Globals struct name to prevent clashes between shader stages
switch (psContext->psShader->eShaderType)
{
default:
ASSERT(0);
break;
case COMPUTE_SHADER:
prefix = 'C';
break;
case VERTEX_SHADER:
prefix = 'V';
break;
case PIXEL_SHADER:
prefix = 'P';
break;
case GEOMETRY_SHADER:
prefix = 'G';
break;
case HULL_SHADER:
prefix = 'H';
break;
case DOMAIN_SHADER:
prefix = 'D';
break;
}
return std::string("_") + prefix + name.substr(1);
}
else
return std::string("_") + name;
}

View File

@ -5,13 +5,15 @@
#include "internal_includes/Declaration.h"
#include <algorithm>
#include <sstream>
#include <cmath>
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#include <cmath>
#define fpcheck(x) ((std::isnan(x)) || (std::isinf(x)))
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix)
@ -34,6 +36,8 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::I
case NAME_RENDER_TARGET_ARRAY_INDEX:
result = "mtl_Layer";
if (outSkipPrefix != NULL) *outSkipPrefix = true;
if (pui32IgnoreSwizzle)
*pui32IgnoreSwizzle = 1;
return true;
case NAME_CLIP_DISTANCE:
{
@ -145,13 +149,8 @@ void ToMetal::DeclareBuiltinInput(const Declaration *psDecl)
m_StructDefinitions[""].m_Members.push_back("float4 mtl_FragCoord [[ position ]]");
break;
case NAME_RENDER_TARGET_ARRAY_INDEX:
#if 0
// Only supported on a Mac
m_StructDefinitions[""].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]");
#else
// Not on Metal
ASSERT(0);
#endif
break;
case NAME_CLIP_DISTANCE:
ASSERT(0); // Should never be an input
@ -267,13 +266,8 @@ void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl)
m_StructDefinitions[out].m_Members.push_back("float4 mtl_Position [[ position ]]");
break;
case NAME_RENDER_TARGET_ARRAY_INDEX:
#if 0
// Only supported on a Mac
m_StructDefinitions[out].m_Members.push_back("uint mtl_Layer [[ render_target_array_index ]]");
#else
// Not on Metal
ASSERT(0);
#endif
break;
case NAME_CLIP_DISTANCE:
// it will be done separately in DeclareClipPlanes
@ -663,8 +657,15 @@ static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psCont
}
}
}
if (eDimension == RESOURCE_DIMENSION_BUFFER)
switch (eDimension)
{
case RESOURCE_DIMENSION_BUFFER:
case RESOURCE_DIMENSION_TEXTURE2DMS:
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
access = "read";
default:
break;
}
}
SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec);
@ -768,19 +769,19 @@ static std::string GetInterpolationString(INTERPOLATION_MODE eMode)
return "";
case INTERPOLATION_LINEAR_CENTROID:
return " [[ centroid ]]";
return " [[ centroid_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE:
return " [[ center_perspective ]]";
return " [[ center_no_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
return " [[ centroid_noperspective ]]";
return " [[ centroid_no_perspective ]]";
case INTERPOLATION_LINEAR_SAMPLE:
return " [[ sample_perspective ]]";
case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE:
return " [[ sample_noperspective ]]";
return " [[ sample_no_perspective ]]";
default:
ASSERT(0);
return "";
@ -801,9 +802,17 @@ void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderV
if (var.Class == SVC_STRUCT)
{
std::ostringstream oss;
if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end())
DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset);
// Report Array-of-Struct CB top-level struct var after all members are reported.
if (var.Parent == NULL && var.Elements > 1 && withinCB)
{
// var.Type being SVT_VOID indicates it is a struct in this case.
psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements);
}
std::ostringstream oss;
oss << var.name << "_Type " << var.name;
if (var.Elements > 1)
{
@ -1197,6 +1206,13 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
m_StructDefinitions[""].m_Members.push_back(oss.str());
break;
}
if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX)
{
std::ostringstream oss;
oss << "uint " << name << " [[ render_target_array_index ]]";
m_StructDefinitions[""].m_Members.push_back(oss.str());
break;
}
if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED)
{
std::ostringstream oss;
@ -1484,7 +1500,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
*(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d
};
if (fpcheck(val[chunk.second.m_Rebase]))
bformata(glsl, "\tas_type<float>(%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]);
bformata(glsl, "\tas_type<float>(0x%Xu)", *(uint32_t *)&val[chunk.second.m_Rebase]);
else
{
bcatcstr(glsl, "\t");
@ -1511,7 +1527,7 @@ void ToMetal::TranslateDeclaration(const Declaration* psDecl)
if (k != 0)
bcatcstr(glsl, ", ");
if (fpcheck(val[k]))
bformata(glsl, "as_type<float>(%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
bformata(glsl, "as_type<float>(0x%Xu)", *(uint32_t *)&val[k + chunk.second.m_Rebase]);
else
HLSLcc::PrintFloat(glsl, val[k + chunk.second.m_Rebase]);
}

View File

@ -6,6 +6,7 @@
#include "stdio.h"
#include <stdlib.h>
#include <algorithm>
#include <cmath>
#include "internal_includes/debug.h"
#include "internal_includes/Shader.h"
#include "internal_includes/Instruction.h"
@ -175,8 +176,8 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType,
int needsParenthesis = 0;
if (typeFlag == TO_FLAG_NONE
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16)
&& CanForceToHalfOperand(&psInst->asOperands[1])
&& CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlag = TO_FLAG_FORCE_HALF;
ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1);
if ((s0ElemCount != s1ElemCount) && (destElemCount > 1))
@ -251,6 +252,25 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType,
}
}
bool ToMetal::CanForceToHalfOperand(const Operand *psOperand)
{
if (psOperand->GetDataType(psContext) == SVT_FLOAT16)
return true;
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER)
{
for (int i = 0; i < psOperand->iNumComponents; i++)
{
float val = fabs(psOperand->afImmediates[i]);
// Do not allow forcing immediate value to half if value is beyond half min/max boundaries
if (val != 0 && (val > 65504 || val < 6.10352e-5))
return false;
}
return true;
}
return false;
}
void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc)
{
@ -392,10 +412,13 @@ void ToMetal::CallBinaryOp(const char* name, Instruction* psInst,
int needsParenthesis = 0;
if (eDataType == SVT_FLOAT
&& psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16)
&& CanForceToHalfOperand(&psInst->asOperands[dest])
&& CanForceToHalfOperand(&psInst->asOperands[src0])
&& CanForceToHalfOperand(&psInst->asOperands[src1]))
{
ui32Flags = TO_FLAG_FORCE_HALF;
eDataType = SVT_FLOAT16;
}
uint32_t maxElems = std::max(src1SwizCount, src0SwizCount);
if (src1SwizCount != src0SwizCount)
@ -436,11 +459,11 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns
int numParenthesis = 0;
if (dataType == TO_FLAG_NONE
&& psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16)
ui32Flags = TO_FLAG_FORCE_HALF;
&& CanForceToHalfOperand(&psInst->asOperands[dest])
&& CanForceToHalfOperand(&psInst->asOperands[src0])
&& CanForceToHalfOperand(&psInst->asOperands[src1])
&& CanForceToHalfOperand(&psInst->asOperands[src2]))
ui32Flags = dataType = TO_FLAG_FORCE_HALF;
if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount)
{
@ -472,10 +495,10 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst,
uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements();
int numParenthesis = 0;
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src2].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& CanForceToHalfOperand(&psInst->asOperands[src0])
&& CanForceToHalfOperand(&psInst->asOperands[src1])
&& CanForceToHalfOperand(&psInst->asOperands[src2]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask)
@ -511,9 +534,9 @@ void ToMetal::CallHelper2(const char* name, Instruction* psInst,
int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0;
int numParenthesis = 0;
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src1].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& CanForceToHalfOperand(&psInst->asOperands[src0])
&& CanForceToHalfOperand(&psInst->asOperands[src1]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
@ -604,8 +627,8 @@ void ToMetal::CallHelper1(const char* name, Instruction* psInst,
int numParenthesis = 0;
psContext->AddIndentation();
if (psInst->asOperands[dest].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[src0].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[dest])
&& CanForceToHalfOperand(&psInst->asOperands[src0]))
ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT;
AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis);
@ -2238,8 +2261,20 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
bcatcstr(glsl, "//UDIV\n");
#endif
//destQuotient, destRemainder, src0, src1
// There are cases where destQuotient is the same variable as src0 or src1. If that happens,
// we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first.
if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber)
&& (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber))
{
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
}
else
{
CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT);
CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT);
}
break;
}
case OPCODE_DIV:
@ -2299,8 +2334,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation();
SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext);
uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2;
if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[1])
&& CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2;
if (dstType != SVT_FLOAT16)
@ -2325,8 +2360,8 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation();
SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext);
uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3;
if (psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[1])
&& CanForceToHalfOperand(&psInst->asOperands[2]))
typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3;
if (dstType != SVT_FLOAT16)
@ -2795,7 +2830,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
DeclareExtraFunction("BFI", "\
template <typename UVecType> UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\
{\n\
UVecType bitmask = (((1 << width)-1) << offset) & 0xffffffff;\n\
UVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\
return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\
}; ");
psContext->AddIndentation();
@ -2971,7 +3006,10 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
}
}
psContext->AddIndentation();
if (ui32SyncFlags & SYNC_THREADS_IN_GROUP)
bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags);
else
bformata(glsl, "simdgroup_barrier(mem_flags::%s);\n", barrierFlags);
break;
}
@ -3236,38 +3274,57 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
break;
}
int srcCount = psSrc->GetNumSwizzleElements();
int numParenthesis = 0;
uint32_t compMask = 0;
#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n
switch (psInst->eResDim)
// unlike glsl, texture arrays will have index in separate argument
const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY)
|| psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY);
uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL;
switch (psRes->eDimension)
{
case RESOURCE_DIMENSION_TEXTURE3D:
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
compMask |= (1 << 2);
case RESOURCE_DIMENSION_TEXTURECUBE:
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
case RESOURCE_DIMENSION_TEXTURE2D:
case RESOURCE_DIMENSION_TEXTURE2DMS:
compMask |= (1 << 1);
case RESOURCE_DIMENSION_TEXTURE1D:
compMask |= 1;
case RRD(TEXTURE3D):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
flags |= TO_AUTO_EXPAND_TO_VEC3;
break;
case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY):
case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
break;
case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X;
break;
default:
ASSERT(0);
break;
ASSERT(0); break;
}
int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0;
psContext->AddIndentation();
AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis);
glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY);
bcatcstr(glsl, ".read(");
glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, compMask);
glsl << TranslateOperand(psSrcAddr, flags, opMask);
if(isArray)
{
// NB cube array is handled incorrectly - it needs extra "face" arg
switch (psRes->eDimension)
{
case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break;
case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break;
case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break;
default: ASSERT(0); break;
}
bcatcstr(glsl, ", ");
glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask);
}
bcatcstr(glsl, ")");
glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0);
AddAssignPrologue(numParenthesis);
#undef RRD
break;
}
case OPCODE_STORE_RAW:
@ -3293,8 +3350,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
{
const ResourceBinding* psRes;
int foundResource;
uint32_t flags = TO_FLAG_UNSIGNED_INTEGER;
uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL;
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//STORE_UAV_TYPED\n");
@ -3319,37 +3375,54 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY);
bcatcstr(glsl, ".write(");
#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n
// unlike glsl, texture arrays will have index in separate argument
const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY)
|| psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY);
uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL;
switch (psRes->eDimension)
{
case REFLECT_RESOURCE_DIMENSION_TEXTURE1D:
case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURE2D:
case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS:
case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURE3D:
case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE:
case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY):
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
flags |= TO_AUTO_EXPAND_TO_VEC3;
break;
case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
flags |= TO_AUTO_EXPAND_TO_VEC4;
break;
default:
ASSERT(0);
break;
};
glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType));
bcatcstr(glsl, ", ");
glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask);
if(isArray)
{
// NB cube array is handled incorrectly - it needs extra "face" arg
flags = TO_FLAG_UNSIGNED_INTEGER;
switch (psRes->eDimension)
{
case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break;
case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):opMask = OPERAND_4_COMPONENT_MASK_Z; break;
case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break;
default: ASSERT(0); break;
}
bcatcstr(glsl, ", ");
glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask);
}
bformata(glsl, ");\n");
#undef RRD
break;
}
case OPCODE_LD_RAW:
@ -3639,10 +3712,10 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
#endif
psContext->AddIndentation();
bool isFP16 = false;
if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[1].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16
&& psInst->asOperands[2].GetDataType(psContext) == SVT_FLOAT16)
if (CanForceToHalfOperand(&psInst->asOperands[0])
&& CanForceToHalfOperand(&psInst->asOperands[1])
&& CanForceToHalfOperand(&psInst->asOperands[2])
&& CanForceToHalfOperand(&psInst->asOperands[2]))
isFP16 = true;
int parenthesis = 0;
AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis);
@ -3767,7 +3840,7 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType;
psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis);
TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY);
bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str());
bcatcstr(glsl, ".get_num_samples()");
AddAssignPrologue(numParenthesis);
break;

View File

@ -14,18 +14,22 @@
using namespace HLSLcc;
#ifdef _MSC_VER
#if _MSC_VER < 1900
#define snprintf _snprintf
#endif
#endif
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
// Returns nonzero if types are just different precisions of the same underlying type
static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag)
static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag)
{
SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag);
@ -352,8 +356,19 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
if (psOperand->eType == OPERAND_TYPE_INPUT)
{
// Check for scalar
if (psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()
&& psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
// You would think checking would be easy but there is a caveat:
// checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved
// as an example consider we have input:
// float2 x; float y;
// and later on we do
// tex2D(xxx, fixed2(x.x, y));
// in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified"
// so we may end up with treating it as scalar (even though it is vector now)
const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber];
const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE;
const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber];
if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE))
{
scalarWithSwizzle = 1;
*pui32IgnoreSwizzle = 1;
@ -384,7 +399,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
bool bitcast = false;
if (AreTypesCompatible(eType, ui32TOFlag) == 0)
if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0)
{
if (CanDoDirectCast(eType, requestedType))
{
@ -406,7 +421,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
// Add ctor if needed (upscaling). Type conversion is already handled above, so here we must
// use the original type to not make type conflicts in bitcasts
if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast))
bool needsUpscaling = ((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast);
// Add constuctor if half precision is forced to avoid template ambiguity error from compiler
bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64);
if (needsForcedCtor)
requestedComponents = std::max(requestedComponents, 1);
if (needsUpscaling || needsForcedCtor)
{
oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "(";
@ -660,8 +683,8 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
//Work out the variable name. Don't apply swizzle to that variable yet.
int32_t rebase = 0;
if(psCBuf)
{
ASSERT(psCBuf != NULL);
uint32_t componentsNeeded = 1;
if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE)
{
@ -679,10 +702,31 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// Get a possible dynamic array index
std::ostringstream dynIndexOss;
bool needsIndexCalcRevert = false;
bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1));
Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert);
if (psDynIndexOp != NULL)
{
SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
dynIndexOss << TranslateOperand(psDynIndexOp, opFlags);
}
std::string dynamicIndexStr = dynIndexOss.str();
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns))
{
// Simple case: just access one component
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
@ -723,7 +767,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (tmpVarType->Class == SVC_SCALAR)
{
@ -744,76 +788,37 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
// Also swizzle.
*pui32IgnoreSwizzle = 1;
}
}
else // We don't have a semantic for this variable, so try the raw dump appoach.
{
ASSERT(0); // We're screwed.
// bformata(glsl, "cb%d.data", psOperand->aui32ArraySizes[0]);//
// index = psOperand->aui32ArraySizes[1];
}
if (isArray)
{
index = arrayIndices.back();
//Dx9 only?
if (psOperand->m_SubOperands[0].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
uint32_t opFlags = TO_FLAG_INTEGER;
// Dynamic index is atm supported only at the root array level. Add here only if there is no such parent.
bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1);
bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0);
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
if (hasDynamicIndex || hasImmediateIndex)
{
// Special handling for matrix arrays
oss << "[(" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << ") / 4]";
oss << "[((" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << ") % 4)]";
}
else
{
oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), opFlags) << "]";
}
}
else
if (index != -1 && psOperand->m_SubOperands[1].get() != NULL)
{
// Array of matrices is treated as array of vec4s in HLSL,
// but that would mess up uniform types in GLSL. Do gymnastics.
SHADER_VARIABLE_TYPE eType = psOperand->m_SubOperands[1].get()->GetDataType(psContext);
uint32_t opFlags = TO_FLAG_INTEGER;
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
std::ostringstream fullIndexOss;
if (hasDynamicIndex && hasImmediateIndex)
fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")";
else if (hasDynamicIndex)
fullIndexOss << dynamicIndexStr;
else // hasImmediateStr
fullIndexOss << index;
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays
oss << "[(" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index <<") / 4]";
oss << "[((" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags, OPERAND_4_COMPONENT_MASK_X) << " + " << index << ") % 4)]";
// Special handling for old matrix arrays
oss << "[" << fullIndexOss.str() << " / 4]";
oss << "[" << fullIndexOss.str() << " %% 4]";
}
else
else // This path is atm the default
{
if (index != 0)
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << " + " << index << "]";
else
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), opFlags) << "]";
oss << "[" << fullIndexOss.str() << "]";
}
}
else if (index != -1)
{
if ((psVarType->Class == SVC_MATRIX_COLUMNS || psVarType->Class == SVC_MATRIX_ROWS) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for matrix arrays, open them up into vec4's
size_t matidx = index / 4;
size_t rowidx = index - (matidx * 4);
oss << "[" << matidx << "][" << rowidx << "]";
}
else
{
oss << "[" << index << "]";
}
}
else if (psOperand->m_SubOperands[1].get() != NULL)
{
oss << "[" << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER) << "]";
}
if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle)
@ -974,6 +979,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
case OPERAND_TYPE_UNORDERED_ACCESS_VIEW:
{
oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber);
*pui32IgnoreSwizzle = 1;
break;
}
case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: