Bring changes from upstream ( 281bc7d8cfdb )

This commit is contained in:
Mikko Strandborg 2018-06-10 10:08:26 +03:00
parent 1d91e686d8
commit 467d0a193a
33 changed files with 3494 additions and 1357 deletions

View File

@ -6,6 +6,7 @@
#include <string>
#include "growing_array.h"
#include <stdint.h>
//Reflection
#define MAX_RESOURCE_BINDINGS 256
@ -109,6 +110,14 @@ enum TESSELLATOR_OUTPUT_PRIMITIVE
TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
};
typedef enum TESSELLATOR_DOMAIN
{
TESSELLATOR_DOMAIN_UNDEFINED = 0,
TESSELLATOR_DOMAIN_ISOLINE = 1,
TESSELLATOR_DOMAIN_TRI = 2,
TESSELLATOR_DOMAIN_QUAD = 3
} TESSELLATOR_DOMAIN;
enum SPECIAL_NAME
{
NAME_UNDEFINED = 0,
@ -232,6 +241,7 @@ struct ResourceBinding
RESOURCE_RETURN_TYPE ui32ReturnType;
uint32_t ui32NumSamples;
REFLECT_RESOURCE_PRECISION ePrecision;
int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with
SHADER_VARIABLE_TYPE GetDataType() const
{
@ -462,7 +472,7 @@ public:
int32_t* pi32Rebase,
uint32_t flags);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
// Apply shader precision information to resource bindings
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
@ -491,5 +501,9 @@ public:
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
uint32_t ui32TessInputControlPointCount;
uint32_t ui32TessOutputControlPointCount;
TESSELLATOR_DOMAIN eTessDomain;
bool bEarlyFragmentTests;
};

View File

@ -0,0 +1,15 @@
#pragma once
// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro.
#include <string>
#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE"
const unsigned int kArraySizeConstantID = 0;
// TODO: share with Runtime/GfxDevice/InstancingUtilities.h
inline bool IsUnityInstancingConstantBufferName(const char* cbName)
{
static const char kInstancedCbNamePrefix[] = "UnityInstancing";
return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0;
}

View File

@ -4,6 +4,7 @@
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#if defined (_WIN32) && defined(HLSLCC_DYNLIB)
#define HLSLCC_APIENTRY __stdcall
@ -48,6 +49,7 @@ typedef struct GlExtensions {
} GlExtensions;
#include "ShaderInfo.h"
#include "UnityInstancingFlexibleArraySize.h"
typedef std::vector<std::string> TextureSamplerPairs;
@ -123,6 +125,88 @@ typedef enum
// Using a texture or uniform name like this will cause conflicts
#define HLSLCC_TEMP_PREFIX "u_xlat"
typedef std::vector<std::pair<std::string, std::string>> MemberDefinitions;
// We store struct definition contents inside a vector of strings
struct StructDefinition
{
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
MemberDefinitions m_Members; // A vector of strings with the struct members
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
bool m_IsPrinted; // Has this struct been printed out yet?
};
typedef std::map<std::string, StructDefinition> StructDefinitions;
// Map of extra function definitions we need to add before the shader body but after the declarations.
typedef std::map<std::string, std::string> FunctionDefinitions;
// A helper class for allocating binding slots
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
class BindingSlotAllocator
{
typedef std::map<uint32_t, uint32_t> SlotMap;
SlotMap m_Allocations;
uint32_t m_ShaderStageAllocations;
public:
BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0)
{
for(int i = MAX_RESOURCE_BINDINGS-1; i >= 0; i --)
m_FreeSlots.push_back(i);
}
enum BindType
{
ConstantBuffer = 0,
RWBuffer,
Texture,
UAV
};
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
{
// The key is regNumber with the bindtype stored to highest 16 bits
uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16);
SlotMap::iterator itr = m_Allocations.find(key);
if(itr == m_Allocations.end())
{
uint32_t slot = m_FreeSlots.back();
m_FreeSlots.pop_back();
m_Allocations.insert(std::make_pair(key, slot));
return slot;
}
return itr->second;
}
// Func for reserving binding slots with the original reg number.
// Used for fragment shader UAVs (SetRandomWriteTarget etc).
void ReserveBindingSlot(uint32_t regNo, BindType type)
{
uint32_t key = regNo | (uint32_t(type) << 16);
m_Allocations.insert(std::make_pair(key, regNo));
// Remove regNo from free slots
for (int i = m_FreeSlots.size() - 1; i >= 0; i--)
{
if (m_FreeSlots[i] == regNo)
{
m_FreeSlots.erase(m_FreeSlots.begin() + i);
return;
}
}
}
uint32_t SaveTotalShaderStageAllocationsCount()
{
m_ShaderStageAllocations = m_Allocations.size();
return m_ShaderStageAllocations;
}
private:
std::vector<uint32_t> m_FreeSlots;
};
//The shader stages (Vertex, Pixel et al) do not depend on each other
//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
//that hull shaders must be compiled before domain shaders, and
@ -207,6 +291,10 @@ public:
GLSLCrossDependencyData()
: eTessPartitioning(),
eTessOutPrim(),
fMaxTessFactor(64.0),
numPatchesInThreadGroup(0),
hasControlPoint(false),
hasPatchConstant(false),
ui32ProgramStages(0),
m_ExtBlendModes(),
m_NextSpecID(0)
@ -290,6 +378,10 @@ public:
//can be saved when compiling hull and passed to domain compilation.
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
float fMaxTessFactor;
int numPatchesInThreadGroup;
bool hasControlPoint;
bool hasPatchConstant;
// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
// Needed so we can construct proper shader input and output names
@ -313,6 +405,28 @@ public:
pixelInterpolation[regNo] = mode;
}
struct CompareFirst
{
CompareFirst(std::string val) : m_Val (val) {}
bool operator()(const std::pair<std::string, std::string>& elem) const
{
return m_Val == elem.first;
}
private:
std::string m_Val;
};
inline bool IsMemberDeclared(const std::string &name)
{
if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end())
return true;
return false;
}
MemberDefinitions m_SharedFunctionMembers;
BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots;
BindingSlotAllocator m_SharedBufferSlots;
inline void ClearCrossDependencyData()
{
pixelInterpolation.clear();
@ -321,8 +435,9 @@ public:
varyingLocationsMap[i].clear();
nextAvailableVaryingLocation[i] = 0;
}
m_NextSpecID = 0;
m_NextSpecID = kArraySizeConstantID + 1;
m_SpecConstantMap.clear();
m_SharedFunctionMembers.clear();
}
// Retrieve or allocate a layout slot for Vulkan specialization constant
@ -368,9 +483,11 @@ public:
virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; }
virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {}
virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {}
};
@ -460,6 +577,12 @@ static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000;
// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue.
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000;
// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader
static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000;
// Disable fastmath
static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000;
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -341,7 +341,7 @@ void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
#define UNITY_EXTERNAL_TOOL 1
#include "Testing.h" // From Runtime/Testing
UNIT_TEST_SUITE(HLSLccTests)
UNIT_TEST_SUITE(HLSLcc)
{
TEST(ControlFlowGraph_Build_Simple_Works)
{

View File

@ -451,15 +451,23 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
break;
case OPCODE_RESINFO:
{
if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT)
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
break;
}
// Operand 0 depends on the return type declaration, op 1 is always uint
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
switch (psInst->eResInfoReturnType)
{
default:
case RESINFO_INSTRUCTION_RETURN_FLOAT:
case RESINFO_INSTRUCTION_RETURN_RCPFLOAT:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
break;
case RESINFO_INSTRUCTION_RETURN_UINT:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
break;
}
case OPCODE_SAMPLE_INFO:
// TODO decode the _uint flag
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
// Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint.
MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType);
break;
case OPCODE_SAMPLE_POS:
@ -469,6 +477,7 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
case OPCODE_LD_UAV_TYPED:
// translates to gvec4 loadImage(gimage i, ivec p).
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
break;
@ -507,9 +516,13 @@ void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext,
break;
case OPCODE_F32TOF16:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_F16TOF32:
// TODO
ASSERT(0);
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
break;

View File

@ -8,6 +8,7 @@
#include "internal_includes/debug.h"
#include "internal_includes/Translator.h"
#include "internal_includes/ControlFlowGraph.h"
#include "include/hlslcc.h"
#include <sstream>
void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
@ -95,15 +96,26 @@ void HLSLCrossCompilerContext::AddIndentation()
}
}
void HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
{
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
return;
return true;
m_EnabledExtensions.insert(extName);
bformata(extensions, "#ifdef %s\n", extName.c_str());
bformata(extensions, "#extension %s : require\n", extName.c_str());
bcatcstr(extensions, "#endif\n");
return false;
}
bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName)
{
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
return true;
m_EnabledExtensions.insert(extName);
bformata(extensions, "#ifdef %s\n", extName.c_str());
bformata(extensions, "#extension %s : enable\n", extName.c_str());
bcatcstr(extensions, "#endif\n");
return false;
}
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
@ -133,9 +145,11 @@ std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOper
if (psIn && piRebase)
*piRebase = psIn->iRebase;
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
std::string res = "";
bool skipPrefix = false;
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix))
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect))
{
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
return inputPrefix + res;
@ -144,7 +158,7 @@ std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOper
}
ASSERT(psIn != NULL);
oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex;
oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex;
return oss.str();
}
@ -193,23 +207,25 @@ std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOpe
return oss.str();
}
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
std::string res = "";
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false))
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect))
{
// HACK: i couldnt find better way to handle it
// clip planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count
// for some reason TranslateSystemValue return *outSkipPrefix = true for ALL system vars and then we simply ignore it here
const bool isClipPlanes = psOut && psOut->eSystemValueType == NAME_CLIP_DISTANCE;
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !isClipPlanes)
// clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count
// with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true
// for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases
if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0))
return outputPrefix + res;
else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
return patchPrefix + res;
else
return res;
}
ASSERT(psOut != NULL);
oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex;
oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex;
return oss.str();
}
@ -275,3 +291,9 @@ bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, co
return false;
}
bool HLSLCrossCompilerContext::IsVulkan() const
{
return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
}

View File

@ -80,8 +80,8 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
if (language == LANG_METAL)
{
// Tessellation or geometry shaders are not supported
if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER)
// Geometry shader is not supported
if (psShader->eShaderType == GEOMETRY_SHADER)
{
result->sourceCode = "";
return 0;
@ -205,7 +205,7 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
length = ftell(shaderFile);
fseek(shaderFile, 0, SEEK_SET);
shader.resize(length + 1);
shader.reserve(length + 1);
readLength = fread(&shader[0], 1, length, shaderFile);

View File

@ -4,6 +4,8 @@
#include "internal_includes/toGLSLOperand.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Shader.h"
#include "internal_includes/languages.h"
#include "include/UnityInstancingFlexibleArraySize.h"
#include <sstream>
#include <cmath>
@ -54,8 +56,7 @@ namespace HLSLcc
return SVT_FLOAT;
}
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
const int components, bool useGLSLPrecision)
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision)
{
static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
@ -68,11 +69,12 @@ namespace HLSLcc
static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
ASSERT(components >= 1 && components <= 4);
bool emitLowp = EmitLowp(context);
switch (eType)
{
case SVT_UINT:
return uintTypes[components];
return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components];
case SVT_UINT16:
return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
case SVT_INT:
@ -80,13 +82,13 @@ namespace HLSLcc
case SVT_INT16:
return useGLSLPrecision ? int16Types[components] : intTypes[components];
case SVT_INT12:
return useGLSLPrecision ? int12Types[components] : intTypes[components];
return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components];
case SVT_FLOAT:
return floatTypes[components];
case SVT_FLOAT16:
return useGLSLPrecision ? float16Types[components] : floatTypes[components];
case SVT_FLOAT10:
return useGLSLPrecision ? float10Types[components] : floatTypes[components];
return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components];
case SVT_BOOL:
return boolTypes[components];
default:
@ -137,7 +139,7 @@ namespace HLSLcc
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return GetConstructorForTypeMetal(eType, components);
else
return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision);
return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision);
}
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
@ -442,7 +444,7 @@ namespace HLSLcc
}
// Returns true if a direct constructor can convert src->dest
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
{
// uint<->int<->bool conversions possible
if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
@ -454,9 +456,23 @@ namespace HLSLcc
(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
return true;
if (context->psShader->eTargetLanguage == LANG_METAL)
{
// avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size
if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT))
return true;
}
return false;
}
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf)
{
return psCBuf != NULL && psCBuf->asVars.size() == 1
&& psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2
&& IsUnityInstancingConstantBufferName(psCBuf->name.c_str());
}
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))

View File

@ -1,4 +1,5 @@
#include "src/internal_includes/HLSLCrossCompilerContext.h"
#include "src/internal_includes/LoopTransform.h"
#include "src/internal_includes/Shader.h"
#include "src/internal_includes/debug.h"
@ -159,7 +160,7 @@ namespace HLSLcc
}
// Attempt to transform a single loop into a for-statement
static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li)
static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li)
{
// In order to transform a loop into a for, the following has to hold:
// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
@ -216,19 +217,22 @@ namespace HLSLcc
// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
if(psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan())
{
switch (itr->eOpcode)
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
{
case OPCODE_LD_RAW:
case OPCODE_LD_STRUCTURED:
case OPCODE_LD_UAV_TYPED:
case OPCODE_STORE_RAW:
case OPCODE_STORE_STRUCTURED:
case OPCODE_STORE_UAV_TYPED:
return; // Nope, can't do a for, not even a partial one.
default:
break;
switch (itr->eOpcode)
{
case OPCODE_LD_RAW:
case OPCODE_LD_STRUCTURED:
case OPCODE_LD_UAV_TYPED:
case OPCODE_STORE_RAW:
case OPCODE_STORE_STRUCTURED:
case OPCODE_STORE_UAV_TYPED:
return; // Nope, can't do a for, not even a partial one.
default:
break;
}
}
}
@ -265,6 +269,13 @@ namespace HLSLcc
// Initializer must only write to one component
if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
initializer = 0;
// Initializer data type must be int or uint
if (initializer)
{
SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext);
if (dataType != SVT_INT && dataType != SVT_UINT)
return;
}
// Check that the initializer is only used within the range so we can move it to for statement
if (initializer)
@ -343,12 +354,12 @@ namespace HLSLcc
}
void DoLoopTransform(ShaderPhase &phase)
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase)
{
Loops loops;
BuildLoopInfo(phase, loops);
std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li)
std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li)
{
// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
// Also that there's at least 2 instructions in loop body
@ -357,7 +368,7 @@ namespace HLSLcc
ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
ASSERT(!li.m_IsSwitch);
ASSERT(!li.m_ExitPoints.empty());
AttemptLoopTransform(phase, li);
AttemptLoopTransform(psContext, phase, li);
});
}
};

View File

@ -5,7 +5,6 @@
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
#include <algorithm>
uint32_t Operand::GetAccessMask() const
{
@ -337,8 +336,11 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
if (regSpace == 0)
psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
&psOut);
else
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut);
else {
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true);
if (!psOut)
return SVT_FLOAT;
}
ASSERT(psOut != NULL);
if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
@ -403,7 +405,6 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
case NAME_RENDER_TARGET_ARRAY_INDEX:
case NAME_VIEWPORT_ARRAY_INDEX:
case NAME_SAMPLE_INDEX:
return SVT_INT;
case NAME_IS_FRONT_FACE:
@ -411,6 +412,7 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
case NAME_POSITION:
case NAME_CLIP_DISTANCE:
case NAME_CULL_DISTANCE:
return SVT_FLOAT;
default:
@ -528,8 +530,12 @@ SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, S
{
return SVT_INT;
}
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan
{
return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT;
}
case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently
default:
{
return SVT_FLOAT;
@ -619,7 +625,9 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co
}
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)
{
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
psOriginOp = &asOps[0];
else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[1];
}
@ -632,7 +640,7 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co
// -> we can use src straight and no index revert calc is needed
if ((psOriginOp->eType == OPERAND_TYPE_INPUT)
|| ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext))
&& psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
&& (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
{
psDynIndexOp = psOriginOp;
*needsIndexCalcRevert = false;
@ -646,4 +654,4 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co
}
return psDynIndexOp;
}
}

View File

@ -89,12 +89,9 @@ int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Registe
}
}
if (allowNull)
return 0;
// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
// In those situations just take the last signature that uses that register (it's typically the "highest" one)
for (i = ui32NumVars - 1; i != 0xffffffff; i--)
for( i = ui32NumVars - 1; i-- > 0; )
{
if (ui32Register == psPatchConstantSignatures[i].ui32Register)
{
@ -103,8 +100,7 @@ int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Registe
}
}
ASSERT(0);
ASSERT(allowNull);
return 0;
}
@ -314,7 +310,7 @@ int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
// Searches for brackets and inserts indices one by one.
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices, const std::string dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
{
std::ostringstream oss;
size_t prevpos = 0;

View File

@ -443,6 +443,8 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token,
case OPCODE_DCL_SAMPLER:
{
psDecl->ui32NumOperands = 1;
psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token);
DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
break;
}
@ -628,9 +630,9 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token,
ui32OperandOffset++;
psDecl->value.interface.ui32InterfaceID = interfaceID;
psDecl->value.interface.ui32NumFuncTables = numClassesImplementingThisInterface;
psDecl->value.interface.ui32ArraySize = arrayLen;
psDecl->value.iface.ui32InterfaceID = interfaceID;
psDecl->value.iface.ui32NumFuncTables = numClassesImplementingThisInterface;
psDecl->value.iface.ui32ArraySize = arrayLen;
psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength;
@ -678,6 +680,7 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token,
}
case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
{
psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token);
break;
}
case OPCODE_HS_DECLS:

View File

@ -6,10 +6,6 @@
#include <vector>
#include <memory>
#ifdef __APPLE__
#include <tr1/memory>
#endif
#include <stdint.h>
struct Instruction;
@ -17,12 +13,7 @@ class Operand;
namespace HLSLcc
{
#ifdef __APPLE__
// Herp derp Apple is stuck in 2005
using namespace std::tr1;
#else
using namespace std;
#endif
namespace ControlFlow
{

View File

@ -50,13 +50,14 @@ struct Declaration
float fMaxTessFactor;
uint32_t ui32IndexRange;
uint32_t ui32GSInstanceCount;
SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode.
struct Interface_TAG
{
uint32_t ui32InterfaceID;
uint32_t ui32NumFuncTables;
uint32_t ui32ArraySize;
} interface;
} iface;
} value;
uint32_t ui32BufferStride;

View File

@ -19,6 +19,7 @@ public:
bstring glsl;
bstring extensions;
bstring beforeMain;
bstring* currentGLSLString;//either glsl or earlyMain of current phase
@ -26,6 +27,11 @@ public:
int indent;
unsigned int flags;
// Helper functions for checking flags
// Returns true if VULKAN_BINDINGS flag is set
bool IsVulkan() const;
Shader* psShader;
GLSLCrossDependencyData* psDependencies;
const char *inputPrefix; // Prefix for shader inputs
@ -48,7 +54,8 @@ public:
bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
void RequireExtension(const std::string &extName);
bool RequireExtension(const std::string &extName);
bool EnableExtension(const std::string &extName);
private:
std::set<std::string> m_EnabledExtensions;

View File

@ -9,6 +9,7 @@
#include "internal_includes/Operand.h"
class HLSLCrossCompilerContext;
struct ConstantBuffer;
namespace HLSLcc
{
@ -20,11 +21,9 @@ namespace HLSLcc
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
const int components, bool useGLSLPrecision);
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision);
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
const int components);
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components);
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
@ -55,7 +54,9 @@ namespace HLSLcc
// Returns true if the instruction adds 1 to the destination temp register
bool IsAddOneInstruction(const Instruction *psInst);
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf);
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f);

View File

@ -2,8 +2,8 @@
#pragma once
class ShaderPhase;
class HLSLCrossCompilerContext;
namespace HLSLcc
{
void DoLoopTransform(ShaderPhase &phase);
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase);
};

View File

@ -4,10 +4,6 @@
#include <vector>
#include <memory>
#ifdef __APPLE__
#include <tr1/memory>
#endif
enum{ MAX_SUB_OPERANDS = 3 };
class Operand;
class HLSLCrossCompilerContext;
@ -21,14 +17,7 @@ struct Instruction;
class Operand
{
public:
#ifdef __APPLE__
// Herp derp Apple is stuck in 2005
typedef std::tr1::shared_ptr<Operand> SubOperandPtr;
#else
typedef std::shared_ptr<Operand> SubOperandPtr;
#endif
Operand()
:

View File

@ -145,7 +145,8 @@ public:
ui32CurrentVertexOutputStream(0),
textureSamplers(),
aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0),
ui32CurrentStructuredBufferIndex()
ui32CurrentStructuredBufferIndex(),
m_DummySamplerDeclared(false)
{
}
@ -257,6 +258,8 @@ public:
std::vector<char> psDoubleTempSizes; // ...and for doubles
std::vector<char> psBoolTempSizes; // ... and for bools
bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that.
private:
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);

View File

@ -18,7 +18,7 @@ public:
virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
// Translate system value type to name, return true if succeeded and no further translation is necessary
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0;
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0;
// In GLSL, the input and output names cannot clash.
// Also, the output name of previous stage must match the input name of the next stage.

View File

@ -43,11 +43,11 @@ static int HaveOverloadedTextureFuncs(const GLLang eLang)
return 1;
}
//Only enable for ES.
//Only enable for ES. Vulkan and Switch.
//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan.
static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext)
{
if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0)
if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0)
return 1;
const GLLang eLang = psContext->psShader->eTargetLanguage;
@ -58,6 +58,12 @@ static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext)
return 0;
}
static int EmitLowp(const HLSLCrossCompilerContext *psContext)
{
const GLLang eLang = psContext->psShader->eTargetLanguage;
return eLang == LANG_ES_100 ? 1 : 0;
}
static int HaveCubemapArray(const GLLang eLang)
{
if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
@ -139,17 +145,68 @@ static int PixelInterpDependency(const GLLang eLang)
return 0;
}
static int HaveUVec(const GLLang eLang)
static int HaveUnsignedTypes(const GLLang eLang)
{
switch(eLang)
{
switch(eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
return 0;
default:
break;
}
return 1;
}
return 1;
}
static int HaveBitEncodingOps(const GLLang eLang)
{
switch(eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveNativeBitwiseOps(const GLLang eLang)
{
switch(eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL)
{
// WebGL only allows dynamic indexing with constant expressions, loop indices or a combination.
// The only exception is for uniform access in vertex shaders, which can be indexed using any expression.
switch(psContext->psShader->eTargetLanguage)
{
case LANG_ES_100:
case LANG_120:
if (psOperand != NULL)
{
if (psOperand->m_ForLoopInductorName)
return 1;
if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
return 1;
}
return 0;
default:
break;
}
return 1;
}
static int HaveGather(const GLLang eLang)

View File

@ -9,20 +9,23 @@ class ToGLSL : public Translator
{
protected:
GLLang language;
bool m_NeedUnityInstancingArraySizeDecl;
public:
explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {}
explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {}
// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
GLLang SetLanguage(GLLang suggestedLanguage);
virtual bool Translate();
virtual void TranslateDeclaration(const Declaration* psDecl);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL);
virtual void SetIOPrefixes();
private:
// Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants.
void IdentifyStaticBranches(ShaderPhase *psPhase);
void BuildStaticBranchNameForInstruction(Instruction &inst);
// May return false when we detect too complex stuff (matrices, arrays etc)
bool BuildStaticBranchNameForInstruction(Instruction &inst);
void DeclareSpecializationConstants(ShaderPhase &phase);
@ -42,14 +45,17 @@ private:
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName);
void AddBuiltinInput(const Declaration* psDecl, const char* builtinName);
void HandleOutputRedirect(const Declaration *psDecl, const char *Precision);
void HandleInputRedirect(const Declaration *psDecl, const char *Precision);
void AddUserOutput(const Declaration* psDecl);
void DeclareStructConstants(const uint32_t ui32BindingPoint,
const ConstantBuffer* psCBuf, const Operand* psOperand,
bstring glsl);
void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl);
void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix = false);
void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType);
void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl);
typedef enum
{
@ -88,10 +94,6 @@ private:
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexelFetchOffset(
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexCoord(
const RESOURCE_DIMENSION eResDim,
Operand* psTexCoordOperand);
@ -107,8 +109,20 @@ private:
Instruction* psInst,
bstring glsl);
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
bool DeclareExtraFunction(const std::string &name, bstring body);
void UseExtraFunctionDependency(const std::string &name);
void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType);
void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements);
bool RenderTargetDeclared(uint32_t input);
std::string GetVulkanDummySamplerName();
// A <function name, body text> map of extra helper functions we'll need.
FunctionDefinitions m_FunctionDefinitions;
std::set<uint32_t> m_DeclaredRenderTarget;
int m_NumDeclaredWhileTrueLoops;
};

View File

@ -4,79 +4,6 @@
#include <map>
#include <vector>
// We store struct definition contents inside a vector of strings
struct StructDefinition
{
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
std::vector<std::string> m_Members; // A vector of strings with the struct members
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
bool m_IsPrinted; // Has this struct been printed out yet?
};
typedef std::map<std::string, StructDefinition> StructDefinitions;
// Map of extra function definitions we need to add before the shader body but after the declarations.
typedef std::map<std::string, std::string> FunctionDefinitions;
// A helper class for allocating binding slots
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
class BindingSlotAllocator
{
typedef std::map<uint32_t, uint32_t> SlotMap;
SlotMap m_Allocations;
public:
BindingSlotAllocator() : m_Allocations()
{
for(int i = MAX_RESOURCE_BINDINGS-1; i >= 0; i --)
m_FreeSlots.push_back(i);
}
enum BindType
{
ConstantBuffer = 0,
RWBuffer,
Texture,
UAV
};
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
{
// The key is regNumber with the bindtype stored to highest 16 bits
uint32_t key = regNo | (uint32_t(type) << 16);
SlotMap::iterator itr = m_Allocations.find(key);
if(itr == m_Allocations.end())
{
uint32_t slot = m_FreeSlots.back();
m_FreeSlots.pop_back();
m_Allocations.insert(std::make_pair(key, slot));
return slot;
}
return itr->second;
}
// Func for reserving binding slots with the original reg number.
// Used for fragment shader UAVs (SetRandomWriteTarget etc).
void ReserveBindingSlot(uint32_t regNo, BindType type)
{
uint32_t key = regNo | (uint32_t(type) << 16);
m_Allocations.insert(std::make_pair(key, regNo));
// Remove regNo from free slots
for (int i = m_FreeSlots.size() - 1; i >= 0; i--)
{
if (m_FreeSlots[i] == regNo)
{
m_FreeSlots.erase(m_FreeSlots.begin() + i);
return;
}
}
}
private:
std::vector<uint32_t> m_FreeSlots;
};
struct SamplerDesc
{
std::string name;
@ -87,10 +14,11 @@ struct TextureSamplerDesc
std::string name;
int textureBind, samplerBind;
HLSLCC_TEX_DIMENSION dim;
bool isMultisampled;
bool isDepthSampler;
bool uav;
};
class ToMetal : public Translator
{
protected:
@ -105,7 +33,7 @@ public:
virtual bool Translate();
virtual void TranslateDeclaration(const Declaration *psDecl);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL);
std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
virtual void SetIOPrefixes();
@ -121,7 +49,9 @@ private:
// Retrieve the name of the output struct for this shader
std::string GetOutputStructName() const;
std::string GetInputStructName() const;
std::string GetCBName(const std::string& cbName) const;
void DeclareHullShaderPassthrough();
void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName);
void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName);
@ -137,7 +67,7 @@ private:
void DeclareOutput(const Declaration *decl);
void PrintStructDeclarations(StructDefinitions &defs);
void PrintStructDeclarations(StructDefinitions &defs, const char *name = "");
std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber);
@ -173,6 +103,8 @@ private:
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
int dest, int src0, int src1, int src2, uint32_t dataType);
void CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags);
void CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
void CallHelper2(const char* name, Instruction* psInst,

View File

@ -665,14 +665,6 @@ static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token)
return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11);
}
typedef enum TESSELLATOR_DOMAIN
{
TESSELLATOR_DOMAIN_UNDEFINED = 0,
TESSELLATOR_DOMAIN_ISOLINE = 1,
TESSELLATOR_DOMAIN_TRI = 2,
TESSELLATOR_DOMAIN_QUAD = 3
} TESSELLATOR_DOMAIN;
static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token)
{
return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11);
@ -780,4 +772,17 @@ static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token)
return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11);
}
typedef enum SB_SAMPLER_MODE
{
D3D10_SB_SAMPLER_MODE_DEFAULT = 0,
D3D10_SB_SAMPLER_MODE_COMPARISON = 1,
D3D10_SB_SAMPLER_MODE_MONO = 2,
} SB_SAMPLER_MODE;
static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token)
{
return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11);
}
#endif

View File

@ -564,6 +564,10 @@ void LoadShaderInfo(const uint32_t ui32MajorVersion,
psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
psInfo->ui32TessInputControlPointCount = 0;
psInfo->ui32TessOutputControlPointCount = 0;
psInfo->eTessDomain = TESSELLATOR_DOMAIN_UNDEFINED;
psInfo->bEarlyFragmentTests = false;
psInfo->ui32MajorVersion = ui32MajorVersion;
psInfo->ui32MinorVersion = ui32MinorVersion;

View File

@ -17,6 +17,7 @@
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/LoopTransform.h"
#include "UnityInstancingFlexibleArraySize.h"
#include <algorithm>
#include <sstream>
@ -98,16 +99,16 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
bool GL_ARB_shader_storage_buffer_object = false;
bool GL_ARB_shader_image_load_store = false;
if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330))
if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330))
{
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
psContext->EnableExtension("GL_ARB_shader_bit_encoding");
}
if(!HaveCompute(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->eShaderType == COMPUTE_SHADER)
{
bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n");
psContext->EnableExtension("GL_ARB_compute_shader");
}
if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] ||
@ -126,7 +127,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED])
{
bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n");
psContext->EnableExtension("GL_ARB_shader_atomic_counters");
}
}
@ -154,7 +155,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
if (!HaveImageAtomics(psContext->psShader->eTargetLanguage))
{
if (isES)
bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n");
psContext->EnableExtension("GL_OES_shader_image_atomic");
else
GL_ARB_shader_image_load_store = true;
}
@ -167,7 +168,50 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C])
{
bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n");
psContext->EnableExtension("GL_ARB_texture_gather");
}
}
if(IsESLanguage(psContext->psShader->eTargetLanguage))
{
if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY])
{
if (psContext->psShader->eTargetLanguage < LANG_ES_300)
{
psContext->EnableExtension("GL_OES_standard_derivatives");
}
}
if (psContext->psShader->eShaderType == PIXEL_SHADER &&
(psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] ||
psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] ||
psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D]))
{
psContext->EnableExtension("GL_EXT_shader_texture_lod");
static const int tex_sampler_type_count = 4;
static const char* tex_sampler_dim_name[tex_sampler_type_count] = {
"1D", "2D", "3D", "Cube",
};
if (psContext->psShader->eTargetLanguage == LANG_ES_100)
{
bcatcstr(extensions,"#if !defined(GL_EXT_shader_texture_lod)\n");
for (int dim = 0; dim < tex_sampler_type_count; dim++)
{
bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]);
if (dim == 1) // 2D
bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]);
}
bcatcstr(extensions,"#endif\n");
}
}
}
@ -176,7 +220,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO])
{
bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n");
psContext->EnableExtension("GL_ARB_gpu_shader5");
}
}
@ -184,7 +228,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD])
{
bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n");
psContext->EnableExtension("GL_ARB_texture_query_lod");
}
}
@ -192,14 +236,14 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO])
{
bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n");
bcatcstr(extensions, "#extension GL_ARB_shader_image_size : enable\n");
psContext->EnableExtension("GL_ARB_texture_query_levels");
psContext->EnableExtension("GL_ARB_shader_image_size");
}
}
if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO ])
{
bcatcstr(extensions, "#extension GL_ARB_shader_texture_image_samples : enable\n");
psContext->EnableExtension("GL_ARB_shader_texture_image_samples");
}
if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage))
@ -209,7 +253,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED])
{
GL_ARB_shader_image_load_store = true;
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
psContext->EnableExtension("GL_ARB_shader_bit_encoding");
}
else
if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] ||
@ -224,7 +268,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
{
bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n");
psContext->EnableExtension("GL_ARB_geometry_shader");
}
}
@ -232,8 +276,8 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
{
bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n");
bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n");
psContext->EnableExtension("GL_OES_geometry_shader");
psContext->EnableExtension("GL_EXT_geometry_shader");
}
}
@ -241,39 +285,37 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)
{
bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n");
bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n");
psContext->EnableExtension("GL_OES_tessellation_shader");
psContext->EnableExtension("GL_EXT_tessellation_shader");
}
}
if (GL_ARB_shader_storage_buffer_object)
bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n");
psContext->EnableExtension("GL_ARB_shader_storage_buffer_object");
if (GL_ARB_shader_image_load_store)
bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n");
psContext->EnableExtension("GL_ARB_shader_image_load_store");
if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage))
{
bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n");
psContext->RequireExtension("GL_ARB_fragment_coord_conventions");
}
if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH)
{
bcatcstr(extensions, "#ifdef GL_EXT_shader_framebuffer_fetch\n");
bcatcstr(extensions, "#extension GL_EXT_shader_framebuffer_fetch : enable\n");
bcatcstr(extensions, "#endif\n");
psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch");
}
//Handle fragment shader default precision
if ((psContext->psShader->eShaderType == PIXEL_SHADER) &&
(psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310))
if (psContext->psShader->eShaderType == PIXEL_SHADER &&
(psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET)))
{
// Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText()
// Except on Vulkan
if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
if((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))
bcatcstr(glsl, "precision highp float;\n");
else if (psContext->psShader->eTargetLanguage == LANG_ES_100)
// gles 2.0 shaders can have mediump as default if the GPU doesn't have highp support
bcatcstr(glsl, "#ifdef GL_FRAGMENT_PRECISION_HIGH\nprecision highp float;\n#else\nprecision mediump float;\n#endif\n");
// Define default int precision to highp to avoid issues on platforms that actually implement mediump
bcatcstr(glsl, "precision highp int;\n");
}
@ -540,13 +582,16 @@ bool ToGLSL::Translate()
if (psShader->extensions)
{
if(psContext->flags & HLSLCC_FLAG_NVN_TARGET)
bcatcstr(extensions, "#extension GL_ARB_separate_shader_objects : enable\n");
{
psContext->EnableExtension("GL_ARB_separate_shader_objects");
psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL)
}
if (psShader->extensions->ARB_explicit_attrib_location)
bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n");
psContext->RequireExtension("GL_ARB_explicit_attrib_location");
if (psShader->extensions->ARB_explicit_uniform_location)
bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n");
psContext->RequireExtension("GL_ARB_explicit_uniform_location");
if (psShader->extensions->ARB_shading_language_420pack)
bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n");
psContext->RequireExtension("GL_ARB_shading_language_420pack");
}
psContext->ClearDependencyData();
@ -562,7 +607,7 @@ bool ToGLSL::Translate()
if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER)
{
bcatcstr(extensions, "#extension GL_KHR_blend_equation_advanced : enable\n");
psContext->EnableExtension("GL_KHR_blend_equation_advanced");
bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n");
for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++)
{
@ -581,8 +626,8 @@ bool ToGLSL::Translate()
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
psShader->ResolveStructuredBufferBindingSlots(&phase);
phase.PruneConstArrays();
if(!psContext->IsVulkan())
phase.PruneConstArrays();
}
psShader->PruneTempRegisters();
@ -591,7 +636,7 @@ bool ToGLSL::Translate()
{
// Loop transform can only be done after the temps have been pruned
ShaderPhase &phase = psShader->asPhases[ui32Phase];
HLSLcc::DoLoopTransform(phase);
HLSLcc::DoLoopTransform(psContext, phase);
if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0)
{
@ -817,6 +862,16 @@ bool ToGLSL::Translate()
}
}
bstring beforeMain = NULL;
bstring beforeMainKeyword = NULL;
if (!HaveDynamicIndexing(psContext))
{
beforeMain = bfromcstr("");
beforeMainKeyword = bfromcstr("\n// Before Main\n\n");
psContext->beforeMain = beforeMain;
}
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
{
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
@ -827,6 +882,12 @@ bool ToGLSL::Translate()
DeclareSpecializationConstants(psShader->asPhases[0]);
}
// Search and replace string, for injecting stuff from translation that need to be after normal declarations and before main
if (!HaveDynamicIndexing(psContext))
{
bconcat(glsl, beforeMainKeyword);
}
bcatcstr(glsl, "void main()\n{\n");
psContext->indent++;
@ -853,15 +914,149 @@ bool ToGLSL::Translate()
bcatcstr(glsl, "}\n");
// Print out extra functions we generated, in reverse order for potential dependencies
std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p)
{
bcatcstr(extensions, p.second.c_str());
bcatcstr(extensions, "\n");
});
// Concat extensions and glsl for the final shader code.
if (m_NeedUnityInstancingArraySizeDecl)
{
if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
{
bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO);
}
else
{
bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n");
}
}
bconcat(extensions, glsl);
bdestroy(glsl);
if (!HaveDynamicIndexing(psContext))
{
bstring empty = bfromcstr("");
if (beforeMain->slen > 1)
bfindreplace(extensions, beforeMainKeyword, beforeMain, 0);
else
bfindreplace(extensions, beforeMainKeyword, empty, 0);
psContext->beforeMain = NULL;
bdestroy(empty);
bdestroy(beforeMain);
bdestroy(beforeMainKeyword);
}
psContext->glsl = extensions;
glsl = NULL;
return true;
}
bool ToGLSL::DeclareExtraFunction(const std::string &name, bstring body)
{
if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
return true;
m_FunctionDefinitions.insert(std::make_pair(name, (const char *) body->data));
return false;
}
static void PrintComponentWrapper1(bstring code, const char *func, const char *type2, const char *type3, const char *type4)
{
bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func);
bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func);
bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func);
}
static void PrintComponentWrapper2(bstring code, const char *func, const char *type2, const char *type3, const char *type4)
{
bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func);
bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func);
bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func);
}
static void PrintTrunc(bstring code, const char *type)
{
bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type);
}
void ToGLSL::UseExtraFunctionDependency(const std::string &name)
{
if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
return;
bstring code = bfromcstr("");
bool match = true;
if (name == "trunc")
{
PrintTrunc(code, "float");
PrintTrunc(code, "vec2");
PrintTrunc(code, "vec3");
PrintTrunc(code, "vec4");
}
else if (name == "roundEven")
{
bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n");
PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4");
}
else if (name == "op_modi")
{
bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n");
PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_and")
{
UseExtraFunctionDependency("op_modi");
bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) && (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n");
PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_or")
{
UseExtraFunctionDependency("op_modi");
bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) || (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n");
PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_xor")
{
UseExtraFunctionDependency("op_and");
bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n");
PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_shr")
{
bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n");
PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_shl")
{
bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n");
PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4");
}
else if (name == "op_not")
{
bformata(code, "int op_not(int value) { return -value - 1; }\n");
PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4");
}
else
{
match = false;
}
if (match)
DeclareExtraFunction(name, code);
bdestroy(code);
}
void ToGLSL::DeclareSpecializationConstants(ShaderPhase &phase)
{
bstring glsl = psContext->glsl;
@ -933,7 +1128,7 @@ static void Base64Encode(const std::string &in, std::string& result)
}
void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst)
bool ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst)
{
std::ostringstream oss;
if (!inst.m_StaticBranchCondition)
@ -966,7 +1161,13 @@ void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst)
bcstrfree(str);
bdestroy(varname);
oss << "!=0";
Base64Encode(oss.str(), inst.m_StaticBranchName);
std::string res = oss.str();
// Sanity checks: no arrays, no matrices
if (res.find('[') != std::string::npos)
return false;
if (res.find("hlslcc_mtx") != std::string::npos)
return false;
Base64Encode(res, inst.m_StaticBranchName);
}
else
{
@ -1012,9 +1213,16 @@ void ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst)
bdestroy(res);
if(argType != SVT_BOOL)
oss << "!=0";
Base64Encode(oss.str(), inst.m_StaticBranchName);
std::string ress = oss.str();
// Sanity checks: no arrays, no matrices
if (ress.find('[') != std::string::npos)
return false;
if (ress.find("hlslcc_mtx") != std::string::npos)
return false;
Base64Encode(ress, inst.m_StaticBranchName);
}
return true;
}
@ -1030,10 +1238,12 @@ void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase)
// Simple case, direct conditional branch
if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER)
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
i.m_StaticBranchCondition = NULL;
BuildStaticBranchNameForInstruction(i);
if (BuildStaticBranchNameForInstruction(i))
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
}
}
// Indirect, comparison via another instruction
if (i.asOperands[0].eType == OPERAND_TYPE_TEMP)
@ -1065,10 +1275,14 @@ void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase)
}
if (isStatic)
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
i.m_StaticBranchCondition = &def;
BuildStaticBranchNameForInstruction(i);
if (BuildStaticBranchNameForInstruction(i))
{
psPhase->m_StaticBranchInstructions.push_back(&i);
i.m_IsStaticBranch = true;
}
else
i.m_StaticBranchCondition = NULL;
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
#include "internal_includes/toGLSLOperand.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/languages.h"
#include "bstrlib.h"
#include "hlslcc.h"
#include "internal_includes/debug.h"
@ -13,7 +14,6 @@
#include <float.h>
#include <stdlib.h>
#include <algorithm>
using namespace HLSLcc;
@ -25,6 +25,8 @@ using namespace HLSLcc;
#endif
#endif // #ifndef fpcheck
// In case we need to fake dynamic indexing
static const char *squareBrackets[2][2] = { { "DynamicIndex(", ")" }, { "[", "]" } };
// Returns nonzero if types are just different precisions of the same underlying type
static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag)
@ -359,7 +361,7 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value
int needsParenthesis = 0;
// Print floats as bit patterns.
if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && fpcheck(*((float *)(&value))))
if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value))))
{
if (psContext->psShader->eTargetLanguage == LANG_METAL)
bcatcstr(glsl, "as_type<float>(");
@ -376,9 +378,18 @@ static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value
case SVT_INT:
case SVT_INT16:
case SVT_INT12:
// Adreno bug (happens only on android 4.* GLES3) casting unsigned representation of negative values to signed int
// results in undefined value/fails to link shader, need to print as signed decimal
if (value > 0x7fffffff && psContext->psShader->eTargetLanguage == LANG_ES_300)
bformata(glsl, "%i", (int32_t)value);
// Need special handling for anything >= uint 0x3fffffff
if (value > 0x3ffffffe)
bformata(glsl, "int(0x%Xu)", value);
else if (value > 0x3ffffffe)
{
if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
bformata(glsl, "int(0x%Xu)", value);
else
bformata(glsl, "0x%X", value);
}
else if(value <= 1024) // Print anything below 1024 as decimal, and hex after that
bformata(glsl, "%d", value);
else
@ -412,6 +423,77 @@ void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui
TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase);
}
void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType)
{
DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements);
}
void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements)
{
bstring glsl = psContext->beforeMain;
const char* suffix = "DynamicIndex";
const uint32_t maxElemCount = 256;
uint32_t elemCount = ui32Elements;
if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end())
return;
// Add a simple define that one can search and replace on devices that support dynamic indexing the usual way
if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end())
{
m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n"));
}
bcatcstr(glsl, "\n");
if (eClass == SVC_STRUCT)
{
bformata(glsl, "%s_Type %s%s", psName, psName, suffix);
}
else if(eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS)
{
if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES)
{
// Translate matrices into vec4 arrays
bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix);
elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows);
if (ui32Elements > 1)
{
elemCount *= ui32Elements;
}
}
else
{
bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix);
}
}
else if (eClass == SVC_VECTOR && ui32Columns > 1)
{
bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix);
}
else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1))
{
bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix);
}
bformata(glsl, "(int i){\n");
bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n");
bformata(glsl, " return %s[i];\n", psName);
bcatcstr(glsl, "#else\n");
bformata(glsl, "#define d_ar %s\n", psName);
bformata(glsl, " if (i <= 0) return d_ar[0];");
// Let's draw a line somewhere with this workaround
for (int i = 1; i < std::min(elemCount, maxElemCount); i++) {
bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i);
}
bformata(glsl, "\n return d_ar[0];\n");
bformata(glsl, "#undef d_ar\n");
bcatcstr(glsl, "#endif\n");
bformata(glsl, "}\n\n");
m_FunctionDefinitions.insert(std::make_pair(psName, ""));
}
void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase)
{
int numParenthesis = 0;
@ -498,7 +580,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (AreTypesCompatible(eType, ui32TOFlag) == 0)
{
if (CanDoDirectCast(eType, requestedType))
if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
{
bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false));
numParenthesis++;
@ -635,8 +717,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
psContext->psShader->eShaderType == PIXEL_SHADER &&
psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH)
{
if(name == "vs_SV_Target0")
bcatcstr(glsl, "SV_Target0");
// With ES2, leave separate variable names for input
if (!WriteToFragData(psContext->psShader->eTargetLanguage) &&
name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12))
bcatcstr(glsl, name.substr(3).c_str());
else
bcatcstr(glsl, name.c_str());
}
@ -674,6 +758,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
break;
}
case OPERAND_TYPE_OUTPUT_DEPTH:
if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth"))
{
bcatcstr(psContext->extensions, "#ifdef GL_EXT_frag_depth\n");
bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n");
bcatcstr(psContext->extensions, "#endif\n");
}
// fall through
case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
{
@ -819,6 +910,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
int32_t index = -1;
std::vector<uint32_t> arrayIndices;
bool isArray = false;
bool isSubpassMS = false;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
switch(psContext->psShader->eShaderType)
@ -947,7 +1039,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
TranslateOperand(dynamicIndex, psDynIndexOp, opFlags);
TranslateOperand(dynamicIndex, psDynIndexOp, opFlags, 0x1); // We only care about the first component
}
char *tmp = bstr2cstr(dynamicIndex, '\0');
@ -966,6 +1058,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
bformata(glsl, "%s.", instanceName.c_str());
}
// Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later.
if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',')
isSubpassMS = true;
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
// We'll need to add the prefix only to the last section of the name
@ -1017,6 +1113,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
// Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later.
if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',')
isSubpassMS = true;
if (tmpVarType->Class == SVC_SCALAR)
{
bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str());
@ -1056,15 +1156,23 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
else // hasImmediateStr
fullIndexOss << index;
int squareBracketType = hasDynamicIndex ? HaveDynamicIndexing(psContext, psOperand) : 1;
if (!squareBracketType)
DeclareDynamicIndexWrapper(psVarType);
if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0))
{
// Special handling for old matrix arrays
bformata(glsl, "[%s / 4]", fullIndexOss.str().c_str());
bformata(glsl, "[%s %% 4]", fullIndexOss.str().c_str());
bformata(glsl, "%%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]);
bformata(glsl, "%s%s %% 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]);
}
else // This path is atm the default
{
bformata(glsl, "[%s]", fullIndexOss.str().c_str());
if(isSubpassMS)
bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")");
else
bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]);
}
}
}
@ -1155,15 +1263,25 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
}
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
{
bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase);
if(psOperand->m_SubOperands[0].get())
if (psContext->IsVulkan())
{
bcatcstr(glsl, "["); //Indexes must be integral. Offset is already taken care of above.
TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER);
bcatcstr(glsl, "]");
bformata(glsl, "ImmCB_%d", psContext->currentPhase);
TranslateOperandIndex(psOperand, 0);
}
else
{
int squareBracketType = HaveDynamicIndexing(psContext, psOperand);
bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase);
if (psOperand->m_SubOperands[0].get())
{
bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above.
TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER);
bformata(glsl, "%s", squareBrackets[squareBracketType][1]);
}
if (psOperand->m_Size == 1)
*pui32IgnoreSwizzle = 1;
}
if (psOperand->m_Size == 1)
*pui32IgnoreSwizzle = 1;
break;
}
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
@ -1322,6 +1440,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
bcatcstr(glsl, "gl_ClipDistance");
*pui32IgnoreSwizzle = 1;
break;
case NAME_CULL_DISTANCE:
bcatcstr(glsl, "gl_CullDistance");
*pui32IgnoreSwizzle = 1;
break;
case NAME_VIEWPORT_ARRAY_INDEX:
bcatcstr(glsl, "gl_ViewportIndex");
*pui32IgnoreSwizzle = 1;
@ -1341,7 +1463,10 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
*pui32IgnoreSwizzle = 1;
break;
case NAME_IS_FRONT_FACE:
bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))");
if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))");
else
bcatcstr(glsl, "(gl_FrontFacing ? int(1) : int(0))");
*pui32IgnoreSwizzle = 1;
break;
case NAME_PRIMITIVE_ID:
@ -1414,7 +1539,7 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (hasCtor && (*pui32IgnoreSwizzle == 0))
{
TranslateOperandSwizzleWithMask(psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0);
TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0);
*pui32IgnoreSwizzle = 1;
}
@ -1423,7 +1548,13 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan
if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8)
bcatcstr(glsl, ") * 0xffffffffu");
else
bcatcstr(glsl, ") * int(0xffffffffu)");
{
if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage))
bcatcstr(glsl, ") * int(0xffffffffu)");
else
bcatcstr(glsl, ") * int(0xffffffff)");
}
numParenthesis--;
}
@ -1453,6 +1584,12 @@ void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t u
ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT|TO_AUTO_BITCAST_TO_INT|TO_AUTO_BITCAST_TO_UINT);
}
if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER))
{
ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER;
ui32TOFlag |= TO_FLAG_INTEGER;
}
if(ui32TOFlag & TO_FLAG_NAME_ONLY)
{
TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase);
@ -1560,14 +1697,23 @@ std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup grou
{
oss << name;
}
if (((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) && group == RGROUP_UAV)
if (psContext->IsVulkan() && group == RGROUP_UAV)
oss << "_origX" << ui32RegisterNumber << "X";
}
else
{
oss << "UnknownResource" << ui32RegisterNumber;
}
return oss.str();
std::string res = oss.str();
// Prefix sampler names with 'sampler' unless it already starts with it
if (group == RGROUP_SAMPLER)
{
if (strncmp(res.c_str(), "sampler", 7) != 0)
res.insert(0, "sampler");
}
return res;
}
void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare)
{

View File

@ -25,10 +25,10 @@ static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring
bformata(glsl, "struct %s\n{\n", sname.c_str());
psContext->indent++;
std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem)
std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem)
{
psContext->AddIndentation();
bcatcstr(glsl, mem.c_str());
bcatcstr(glsl, mem.second.c_str());
bcatcstr(glsl, ";\n");
});
@ -36,10 +36,10 @@ static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring
bcatcstr(glsl, "};\n\n");
}
void ToMetal::PrintStructDeclarations(StructDefinitions &defs)
void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name)
{
bstring glsl = *psContext->currentGLSLString;
StructDefinition &args = defs[""];
StructDefinition &args = defs[name];
std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname)
{
PrintStructDeclaration(psContext, glsl, sname, defs);
@ -47,11 +47,40 @@ void ToMetal::PrintStructDeclarations(StructDefinitions &defs)
}
static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType)
{
switch (eType)
{
default:
case MAIN_PHASE: return "";
case HS_GLOBAL_DECL_PHASE: return "hs_global_decls";
case HS_FORK_PHASE: return "fork_phase";
case HS_CTRL_POINT_PHASE: return "control_point_phase";
case HS_JOIN_PHASE: return "join_phase";
}
}
static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
{
uint32_t i;
bstring glsl = *psContext->currentGLSLString;
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
{
const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
psContext->AddIndentation();
bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
}
}
bool ToMetal::Translate()
{
bstring glsl;
uint32_t i;
Shader* psShader = psContext->psShader;
uint32_t ui32Phase;
psContext->psTranslator = this;
SetIOPrefixes();
@ -79,35 +108,261 @@ bool ToMetal::Translate()
psContext->ClearDependencyData();
const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE };
uint32_t ui32PhaseCallIndex;
int hasControlPointPhase = 0;
const int maxThreadsPerThreadGroup = 32;
int numPatchesInThreadGroup = 0;
bool hasControlPoint = false;
bool hasPatchConstant = false;
std::string tessVertexFunctionArguments;
if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
{
if (psContext->psDependencies)
{
m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers;
m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots;
m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots;
m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots;
hasControlPoint = psContext->psDependencies->hasControlPoint;
hasPatchConstant = psContext->psDependencies->hasPatchConstant;
}
}
ClampPartialPrecisions();
ShaderPhase &phase = psShader->asPhases[0];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan
phase.PruneConstArrays();
HLSLcc::DoLoopTransform(phase);
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
ShaderPhase &phase = psShader->asPhases[ui32Phase];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan
HLSLcc::DoLoopTransform(psContext, phase);
}
psShader->PruneTempRegisters();
bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
//Special case. Can have multiple phases.
if(psShader->eShaderType == HULL_SHADER)
{
psShader->ConsolidateHullTempVars();
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
// Find out if we have a passthrough hull shader
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
hasControlPointPhase = 1;
}
}
// Output default implementations for framebuffer index remap if needed
if(m_NeedFBOutputRemapDecl)
bcatcstr(glsl, "#ifndef XLT_REMAP_O\n#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n");
if(m_NeedFBInputRemapDecl)
bcatcstr(glsl, "#ifndef XLT_REMAP_I\n#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n");
DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size());
GenerateTexturesReflection(&psContext->m_Reflection);
// Hull and Domain shaders get merged into vertex shader output
if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER))
{
if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH)
bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n");
bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n");
}
if (psShader->eShaderType == HULL_SHADER)
{
psContext->indent++;
// Phase 1 is always the global decls phase, no instructions
for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i)
{
TranslateDeclaration(&psShader->asPhases[1].psDecl[i]);
}
if (hasControlPointPhase == 0)
{
DeclareHullShaderPassthrough();
}
for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++)
{
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
continue;
psContext->currentPhase = ui32Phase;
#ifdef _DEBUG
// bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
#endif
for (i = 0; i < psPhase->psDecl.size(); ++i)
{
TranslateDeclaration(&psPhase->psDecl[i]);
}
}
}
psContext->indent--;
numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount);
}
else
{
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
// Output default implementations for framebuffer index remap if needed
if (m_NeedFBOutputRemapDecl)
bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n");
if (m_NeedFBInputRemapDecl)
bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n");
DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size());
GenerateTexturesReflection(&psContext->m_Reflection);
}
if (psShader->eShaderType == HULL_SHADER)
{
psContext->currentPhase = MAIN_PHASE;
if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0)
{
hasControlPoint = true;
m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint");
m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn");
PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint");
PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn");
}
if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0)
{
hasPatchConstant = true;
m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant");
m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn");
PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant");
PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn");
}
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches"));
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch"));
if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0)
{
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo");
PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo");
}
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
{
m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName());
// Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents
m_StructDefinitions[GetInputStructName()].m_Members.clear();
bstring vertexOut = bfromcstr("");
bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount);
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *) vertexOut->data));
bdestroy(vertexOut);
}
if(psContext->psDependencies)
{
for (auto itr = psContext->psDependencies->m_SharedFunctionMembers.begin(); itr != psContext->psDependencies->m_SharedFunctionMembers.end(); itr++)
{
tessVertexFunctionArguments += itr->first.c_str();
tessVertexFunctionArguments += ", ";
}
}
}
if (psShader->eShaderType == DOMAIN_SHADER)
{
// For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader
if (hasControlPoint)
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point<Mtl_ControlPointIn> cp"));
if (hasPatchConstant)
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch"));
}
if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
{
if (psContext->psDependencies)
{
psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members;
psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots;
psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount();
psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots;
psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount();
psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots;
psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount();
}
}
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
{
m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]");
if (psShader->eShaderType == HULL_SHADER)
{
m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]"));
m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]"));
m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]"));
bstring buffer = bfromcstr("");
uint32_t slot = 0;
if (hasControlPoint)
{
slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer);
bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot);
m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *) buffer->data));
btrunc(buffer, 0);
}
if (hasPatchConstant)
{
slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer);
bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot);
m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *) buffer->data));
btrunc(buffer, 0);
}
slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer);
bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot);
m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *) buffer->data));
btrunc(buffer, 0);
slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer);
bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot);
m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *) buffer->data));
btrunc(buffer, 0);
bdestroy(buffer);
}
else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
{
m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input"));
}
else
{
m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]"));
}
if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
{
// m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions
std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem)
{
if (mem.first == "mtl_InstanceID")
{
if (psShader->eShaderType == VERTEX_SHADER)
mem.second.assign("uint mtl_InstanceID");
else if (psShader->eShaderType == HULL_SHADER)
mem.second.assign("// mtl_InstanceID passed through groupID");
}
});
}
m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName());
}
@ -123,55 +378,285 @@ bool ToMetal::Translate()
psContext->currentGLSLString = &bodyglsl;
bool popPragmaDiagnostic = false;
if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)
{
popPragmaDiagnostic = true;
bcatcstr(bodyglsl, "#pragma clang diagnostic push\n");
bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n");
}
switch (psShader->eShaderType)
{
case VERTEX_SHADER:
bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0)
bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
else
bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n");
break;
case PIXEL_SHADER:
if (psShader->sInfo.bEarlyFragmentTests)
bcatcstr(bodyglsl, "[[early_fragment_tests]]\n");
bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n");
break;
case COMPUTE_SHADER:
bcatcstr(bodyglsl, "kernel void computeMain(\n");
break;
case HULL_SHADER:
bcatcstr(bodyglsl, "kernel void patchKernel(\n");
break;
case DOMAIN_SHADER:
{
const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle";
uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount;
bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount);
break;
}
default:
// Not supported
ASSERT(0);
return false;
}
psContext->indent++;
for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++)
for (auto itr = m_StructDefinitions[""].m_Members.begin(); ;)
{
if (itr == m_StructDefinitions[""].m_Members.end())
break;
psContext->AddIndentation();
bcatcstr(bodyglsl, itr->c_str());
if (itr + 1 != m_StructDefinitions[""].m_Members.end())
bcatcstr(bodyglsl, itr->second.c_str());
itr++;
if (itr != m_StructDefinitions[""].m_Members.end())
bcatcstr(bodyglsl, ",\n");
}
bcatcstr(bodyglsl, ")\n{\n");
if (popPragmaDiagnostic)
bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n");
if (psShader->eShaderType != COMPUTE_SHADER)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, GetOutputStructName().c_str());
bcatcstr(bodyglsl, " output;\n");
if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, GetOutputStructName().c_str());
bcatcstr(bodyglsl, " output;\n");
}
}
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
if (psShader->eShaderType == HULL_SHADER)
{
#ifdef _DEBUG
if (hasPatchConstant)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n");
}
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "const uint internalControlPointID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n");
psContext->AddIndentation();
bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str());
psContext->AddIndentation();
bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str());
psContext->AddIndentation();
std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf";
bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str());
}
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
// There are cases when there are no control point phases and we have to do passthrough
if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0)
{
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
psContext->AddIndentation();
bcatcstr(bodyglsl, "if (patchValid) {\n");
psContext->indent++;
// Passthrough control point phase, run the rest only once per patch
psContext->AddIndentation();
bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str());
DoHullShaderPassthrough(psContext);
psContext->indent--;
psContext->AddIndentation();
bcatcstr(bodyglsl, "}\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "if (!patchValid) {\n");
psContext->indent++;
psContext->AddIndentation();
bcatcstr(bodyglsl, "return;\n");
psContext->indent--;
psContext->AddIndentation();
bcatcstr(bodyglsl, "}\n");
}
if (psShader->eShaderType == HULL_SHADER)
{
for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++)
{
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
uint32_t i;
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
continue;
psContext->currentPhase = ui32Phase;
if (psPhase->earlyMain->slen > 1)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
bconcat(bodyglsl, psPhase->earlyMain);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
}
psContext->AddIndentation();
bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase);
if (psPhase->ui32InstanceCount > 1)
{
psContext->AddIndentation();
bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount);
psContext->indent++;
}
else
{
if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "if (patchValid) {\n");
psContext->indent++;
psContext->AddIndentation();
bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str());
}
else
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "{\n");
psContext->indent++;
}
}
if (psPhase->psInst.size() > 0)
{
//The minus one here is remove the return statement at end of phases.
//We don't want to translate that, we'll just end the function body.
ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET);
for (i = 0; i < psPhase->psInst.size() - 1; ++i)
{
TranslateInstruction(&psPhase->psInst[i]);
}
}
psContext->indent--;
psContext->AddIndentation();
bformata(bodyglsl, "}\n");
if (psPhase->hasPostShaderCode)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Post shader code ---\n");
#endif
bconcat(bodyglsl, psPhase->postShaderCode);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End post shader code ---\n");
#endif
}
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
{
// We're done printing control point phase, run the rest only once per patch
psContext->AddIndentation();
bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n");
psContext->AddIndentation();
bcatcstr(bodyglsl, "if (!patchValid) {\n");
psContext->indent++;
psContext->AddIndentation();
bcatcstr(bodyglsl, "return;\n");
psContext->indent--;
psContext->AddIndentation();
bcatcstr(bodyglsl, "}\n");
}
}
}
if (hasControlPoint)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "controlPoints[internalControlPointID] = output;\n");
}
psContext->AddIndentation();
bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n");
if (hasPatchConstant)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n");
}
if(psContext->psDependencies)
{
//Save partitioning and primitive type for use by domain shader.
psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim;
psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning;
psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup;
psContext->psDependencies->hasControlPoint = hasControlPoint;
psContext->psDependencies->hasPatchConstant = hasPatchConstant;
}
}
else
{
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
}
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
{
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
}
}
psContext->indent--;
@ -179,7 +664,60 @@ bool ToMetal::Translate()
bcatcstr(bodyglsl, "}\n");
psContext->currentGLSLString = &glsl;
if(psShader->eShaderType == HULL_SHADER && psContext->psDependencies)
{
psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount());
}
if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies)
{
int mtlTessellationPartitionMode = -1;
int mtlWinding = -1;
switch (psContext->psDependencies->eTessPartitioning)
{
case TESSELLATOR_PARTITIONING_INTEGER:
mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger
break;
case TESSELLATOR_PARTITIONING_POW2:
mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2
break;
case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd
break;
case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven
break;
case TESSELLATOR_PARTITIONING_UNDEFINED:
default:
ASSERT(0);
break;
}
switch (psContext->psDependencies->eTessOutPrim)
{
case TESSELLATOR_OUTPUT_TRIANGLE_CW:
mtlWinding = 0; // MTLWindingClockwise
break;
case TESSELLATOR_OUTPUT_TRIANGLE_CCW:
mtlWinding = 1; // MTLWindingCounterClockwise
break;
case TESSELLATOR_OUTPUT_POINT:
psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true);
break;
case TESSELLATOR_OUTPUT_LINE:
psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true);
break;
case TESSELLATOR_OUTPUT_UNDEFINED:
default:
ASSERT(0);
break;
}
psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t) psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup);
}
bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str());
// Print out extra functions we generated
@ -212,6 +750,13 @@ std::string ToMetal::GetOutputStructName() const
return "Mtl_VertexOut";
case PIXEL_SHADER:
return "Mtl_FragmentOut";
case HULL_SHADER:
if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE ||
psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE)
return "Mtl_PatchConstant";
return "Mtl_ControlPoint";
case DOMAIN_SHADER:
return "Mtl_VertexOutPostTess";
default:
ASSERT(0);
return "";
@ -228,17 +773,50 @@ std::string ToMetal::GetInputStructName() const
return "Mtl_FragmentIn";
case COMPUTE_SHADER:
return "Mtl_KernelIn";
case HULL_SHADER:
return "Mtl_HullIn";
case DOMAIN_SHADER:
return "Mtl_VertexInPostTess";
default:
ASSERT(0);
return "";
}
}
std::string ToMetal::GetCBName(const std::string& cbName) const
{
std::string output = cbName;
if (cbName[0] == '$')
{
// "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB.
switch (psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
case HULL_SHADER:
case DOMAIN_SHADER:
output[0] = 'V';
break;
case PIXEL_SHADER:
output[0] = 'F';
break;
case COMPUTE_SHADER:
output = cbName.substr(1);
break;
default:
ASSERT(0);
break;
}
}
return output;
}
void ToMetal::SetIOPrefixes()
{
switch (psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
case HULL_SHADER:
case DOMAIN_SHADER:
psContext->inputPrefix = "input.";
psContext->outputPrefix = "output.";
break;

File diff suppressed because it is too large Load Diff

View File

@ -246,7 +246,7 @@ void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType,
glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask);
if (!isBoolDest)
{
bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u");
bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u");
}
AddAssignPrologue(needsParenthesis);
}
@ -361,6 +361,25 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand
// TODO: We can actually do this in one op using mix().
int srcElem = -1;
SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext);
// Use an extra temp if dest is also one of the sources. Without this some swizzle combinations
// might alter the source before all components are handled.
const char* tempName = "hlslcc_movcTemp";
bool dstIsSrc1 = (pDest->eType == src1->eType) && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber);
bool dstIsSrc2 = (pDest->eType == src2->eType) && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber);
if (dstIsSrc1 || dstIsSrc2)
{
psContext->AddIndentation();
bcatcstr(glsl, "{\n");
++psContext->indent;
psContext->AddIndentation();
int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ?
psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) :
pDest->iNumComponents;
bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName, TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str());
}
for (destElem = 0; destElem < 4; ++destElem)
{
int numParenthesis = 0;
@ -391,12 +410,27 @@ void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand
}
}
glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem);
if (!dstIsSrc1)
glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem);
else
bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src1, 1 << srcElem, 0).c_str());
bcatcstr(glsl, " : ");
glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem);
if (!dstIsSrc2)
glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem);
else
bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src2, 1 << srcElem, 0).c_str());
AddAssignPrologue(numParenthesis);
}
if (dstIsSrc1 || dstIsSrc2)
{
--psContext->indent;
psContext->AddIndentation();
bcatcstr(glsl, "}\n");
}
}
}
@ -484,9 +518,8 @@ void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psIns
}
void ToMetal::CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask)
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags)
{
uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT;
bstring glsl = *psContext->currentGLSLString;
uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL;
uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask);
@ -521,6 +554,12 @@ void ToMetal::CallHelper3(const char* name, Instruction* psInst,
AddAssignPrologue(numParenthesis);
}
void ToMetal::CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask)
{
CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT);
}
void ToMetal::CallHelper2(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask)
{
@ -850,9 +889,9 @@ void ToMetal::TranslateTexCoord(
opMask = OPERAND_4_COMPONENT_MASK_X;
bstring glsl = *psContext->currentGLSLString;
glsl << TranslateOperand(psTexCoordOperand, flags, opMask);
bcatcstr(glsl, ", round(");
opMask = OPERAND_4_COMPONENT_MASK_Y;
flags = TO_AUTO_BITCAST_TO_FLOAT;
isArray = true;
@ -878,10 +917,10 @@ void ToMetal::TranslateTexCoord(
// xy for coord, z for array element
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y;
flags |= TO_AUTO_EXPAND_TO_VEC2;
bstring glsl = *psContext->currentGLSLString;
glsl << TranslateOperand(psTexCoordOperand, flags, opMask);
bcatcstr(glsl, ", round(");
opMask = OPERAND_4_COMPONENT_MASK_Z;
@ -894,12 +933,12 @@ void ToMetal::TranslateTexCoord(
// xyz for coord, w for array element
opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z;
flags |= TO_AUTO_EXPAND_TO_VEC3;
bstring glsl = *psContext->currentGLSLString;
glsl << TranslateOperand(psTexCoordOperand, flags, opMask);
bcatcstr(glsl, ", round(");
opMask = OPERAND_4_COMPONENT_MASK_W;
flags = TO_AUTO_BITCAST_TO_FLOAT;
isArray = true;
@ -915,7 +954,7 @@ void ToMetal::TranslateTexCoord(
//FIXME detect when integer coords are needed.
bstring glsl = *psContext->currentGLSLString;
glsl << TranslateOperand(psTexCoordOperand, flags, opMask);
if (isArray)
bcatcstr(glsl, ")");
@ -948,7 +987,7 @@ void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem)
bcatcstr(glsl, "1.0f / float(");
numParenthesis++;
}
glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE);
glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY);
if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) ||
(index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY ||
psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY)))
@ -958,13 +997,13 @@ void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem)
else
{
bcatcstr(glsl, metalGetters[index]);
if (index < 3)
{
if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS &&
psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY)
glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level
bcatcstr(glsl, ")");
}
}
@ -1073,13 +1112,23 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber);
psContext->AddIndentation();
AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis);
std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY);
// TextureName.FuncName(
glsl << texName;
bformata(glsl, ".%s(", funcName);
bool isDepthSampler = false;
for(unsigned j = 0, m = m_Textures.size() ; j < m ; ++j)
{
if(m_Textures[j].name == texName)
{
isDepthSampler = m_Textures[j].isDepthSampler;
break;
}
}
// Sampler name
//TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more
// accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage)
@ -1143,8 +1192,8 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
}
bool hadOffset = false;
// Add offset param
// Add offset param
if (psInst->bAddressOffset)
{
hadOffset = true;
@ -1178,7 +1227,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
mask |= OPERAND_4_COMPONENT_MASK_Y;
if (ui32NumOffsets > 2)
mask |= OPERAND_4_COMPONENT_MASK_Z;
bcatcstr(glsl, ",");
glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask);
}
@ -1191,7 +1240,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
{
if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE))
{
// Need to add offset param to match func overload
// Need to add offset param to match func overload
if (!hadOffset)
{
if (ui32NumOffsets == 1)
@ -1199,7 +1248,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
else
bformata(glsl, ", int%d(0)", ui32NumOffsets);
}
bcatcstr(glsl, ", component::");
glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false);
}
@ -1212,7 +1261,7 @@ void ToMetal::TranslateTextureSample(Instruction* psInst,
bcatcstr(glsl, ")");
if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER))
if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER))
{
// iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms
// does not make sense. But need to re-enable to correctly swizzle this particular instruction.
@ -1232,7 +1281,7 @@ void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
bstring glsl = *psContext->currentGLSLString;
ASSERT(psVarType->Class == SVC_VECTOR);
bcatcstr(glsl, "["); // Access vector component with [] notation
bcatcstr(glsl, "["); // Access vector component with [] notation
if (offset > 0)
bcatcstr(glsl, "(");
@ -1290,7 +1339,7 @@ void ToMetal::TranslateShaderStorageStore(Instruction* psInst)
{
psContext->AddIndentation();
glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY);
if (psDestAddr)
{
bcatcstr(glsl, "[");
@ -1427,7 +1476,7 @@ void ToMetal::TranslateShaderStorageLoad(Instruction* psInst)
bcatcstr(glsl, "u");
}
bcatcstr(glsl, "]");
if (addedBitcast)
bcatcstr(glsl, ")");
}
@ -1780,7 +1829,7 @@ void ToMetal::TranslateAtomicMemOp(Instruction* psInst)
glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY);
bcatcstr(glsl, "[");
glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X);
if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED)
{
// Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[]
@ -2026,7 +2075,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation();
bcatcstr(glsl, "//MAD\n");
#endif
CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE);
CallHelper3("fma", psInst, 0, 1, 2, 3, 1);
break;
}
case OPCODE_IMAD:
@ -2045,6 +2094,16 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags);
break;
}
case OPCODE_DFMA:
{
uint32_t ui32Flags = TO_FLAG_DOUBLE;
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//DFMA\n");
#endif
CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags);
break;
}
case OPCODE_DADD:
{
#ifdef _DEBUG
@ -2992,24 +3051,27 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
psContext->AddIndentation();
bcatcstr(glsl, "//SYNC\n");
#endif
const char *barrierFlags = "mem_none";
if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY)
{
barrierFlags = "mem_threadgroup";
}
if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL))
{
barrierFlags = "mem_device";
if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY)
{
barrierFlags = "mem_device_and_threadgroup";
}
}
psContext->AddIndentation();
const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0;
const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0;
const char* barrierFlags = "mem_flags::mem_none";
if(sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device";
else if(sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup";
else if(sync_device) barrierFlags = "mem_flags::mem_device";
if (ui32SyncFlags & SYNC_THREADS_IN_GROUP)
bformata(glsl, "threadgroup_barrier(mem_flags::%s);\n", barrierFlags);
{
psContext->AddIndentation();
bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags);
}
else
bformata(glsl, "simdgroup_barrier(mem_flags::%s);\n", barrierFlags);
{
psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n");
psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags);
psContext->AddIndentation(); bformata(glsl, "#else\n");
psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags);
psContext->AddIndentation(); bformata(glsl, "#endif\n");
}
break;
}
@ -3107,7 +3169,7 @@ void ToMetal::TranslateInstruction(Instruction* psInst)
#endif
psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding);
if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf
{
psInst->eOpcode = OPCODE_LD_UAV_TYPED;
@ -3586,75 +3648,53 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
}
case OPCODE_F32TOF16:
{
// TODO Metallize
ASSERT(0); // Are these even used?
const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements();
const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements();
uint32_t destElem;
uint32_t writeMask = psInst->asOperands[0].GetAccessMask();
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//F32TOF16\n");
psContext->AddIndentation();
bcatcstr(glsl, "//F32TOF16\n");
#endif
for (destElem = 0; destElem < destElemCount; ++destElem)
{
const char* swizzle[] = { ".x", ".y", ".z", ".w" };
//unpackHalf2x16 converts two f16s packed into uint to two f32s.
for (int i = 0; i < 4; i++)
{
if ((writeMask & (1 << i)) == 0)
continue;
psContext->AddIndentation();
psInst->asOperands[0].ui32CompMask = (1 << i);
psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis);
//dest.swiz.x = unpackHalf2x16(src.swiz.x).x
//dest.swiz.y = unpackHalf2x16(src.swiz.y).x
//dest.swiz.z = unpackHalf2x16(src.swiz.z).x
//dest.swiz.w = unpackHalf2x16(src.swiz.w).x
psContext->AddIndentation();
glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION);
if (destElemCount > 1)
bcatcstr(glsl, swizzle[destElem]);
bcatcstr(glsl, " = unpackHalf2x16(");
glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER);
if (s0ElemCount > 1)
bcatcstr(glsl, swizzle[destElem]);
bcatcstr(glsl, ").x;\n");
}
break;
bcatcstr(glsl, "as_type<uint>(half2(");
glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i));
bcatcstr(glsl, ", 0.0))");
AddAssignPrologue(numParenthesis);
}
break;
}
case OPCODE_F16TOF32:
{
// TODO metallize
ASSERT(0); // Are these even used?
const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements();
const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements();
uint32_t destElem;
uint32_t writeMask = psInst->asOperands[0].GetAccessMask();
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//F16TOF32\n");
psContext->AddIndentation();
bcatcstr(glsl, "//F16TOF32\n");
#endif
for (destElem = 0; destElem < destElemCount; ++destElem)
{
const char* swizzle[] = { ".x", ".y", ".z", ".w" };
//packHalf2x16 converts two f32s to two f16s packed into a uint.
for (int i = 0; i < 4; i++)
{
if ((writeMask & (1 << i)) == 0)
continue;
psContext->AddIndentation();
psInst->asOperands[0].ui32CompMask = (1 << i);
psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis);
//dest.swiz.x = packHalf2x16(vec2(src.swiz.x)) & 0xFFFF
//dest.swiz.y = packHalf2x16(vec2(src.swiz.y)) & 0xFFFF
//dest.swiz.z = packHalf2x16(vec2(src.swiz.z)) & 0xFFFF
//dest.swiz.w = packHalf2x16(vec2(src.swiz.w)) & 0xFFFF
psContext->AddIndentation();
glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_UNSIGNED_INTEGER);
if (destElemCount > 1)
bcatcstr(glsl, swizzle[destElem]);
bcatcstr(glsl, " = packHalf2x16(vec2(");
glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE);
if (s0ElemCount > 1)
bcatcstr(glsl, swizzle[destElem]);
bcatcstr(glsl, ")) & 0xFFFF;\n");
}
break;
bcatcstr(glsl, "as_type<half2>(");
glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i));
bcatcstr(glsl, ").x");
AddAssignPrologue(numParenthesis);
}
break;
}
case OPCODE_INEG:
{
@ -3783,7 +3823,7 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//INOT\n");
bcatcstr(glsl, "//NOT\n");
#endif
psContext->AddIndentation();
AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis);
@ -3830,7 +3870,7 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450
break;
}
case OPCODE_SAMPLE_INFO:
{
#ifdef _DEBUG
@ -3858,7 +3898,6 @@ template <int N> vec<int, N> bitFieldExtractI(const vec<uint, N> width, const ve
case OPCODE_DTOF:
case OPCODE_FTOD:
case OPCODE_DDIV:
case OPCODE_DFMA:
case OPCODE_DRCP:
case OPCODE_MSAD:
case OPCODE_DTOI:

View File

@ -7,7 +7,6 @@
#include "internal_includes/toMetal.h"
#include <cmath>
#include <sstream>
#include <algorithm>
#include <float.h>
#include <stdlib.h>
@ -331,6 +330,16 @@ static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType)
return oss.str();
}
static std::string MakeCBVarName(const std::string &cbName, const std::string &fullName, bool isUnityInstancingBuffer)
{
// For Unity instancing buffer: "CBufferName.StructTypeName[] -> CBufferName[]". See ToMetal::DeclareConstantBuffer.
if (isUnityInstancingBuffer && !cbName.empty() && cbName[cbName.size() - 1] == '.' && fullName.find_first_of('[') != std::string::npos)
{
return cbName.substr(0, cbName.size() - 1) + fullName.substr(fullName.find_first_of('['));
}
return cbName + fullName;
}
std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase)
{
std::ostringstream oss;
@ -402,7 +411,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
bool bitcast = false;
if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0)
{
if (CanDoDirectCast(eType, requestedType))
if (CanDoDirectCast(psContext, eType, requestedType))
{
oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "(";
numParenthesis++;
@ -490,19 +499,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
{
const ShaderInfo::InOutSignature *psSig = NULL;
psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig);
if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) ||
(psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) ||
(psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0))
if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)
{
// Shouldn't happen on Metal?
ASSERT(0);
break;
// bcatcstr(glsl, "gl_in");
// TranslateOperandIndex(psOperand, 0);//Vertex index
// bcatcstr(glsl, ".gl_Position");
oss << "input.cp";
oss << TranslateOperandIndex(psOperand, 0);//Vertex index
oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle);
}
else
{
// Not sure if this codepath is active outside hull/domain
oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle);
oss << TranslateOperandIndex(psOperand, 0);//Vertex index
@ -654,6 +659,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
int32_t index = -1;
std::vector<uint32_t> arrayIndices;
bool isArray = false;
bool isFBInput = false;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
ASSERT(psCBuf != NULL);
@ -665,14 +671,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
if(psCBuf)
{
//$Globals.
if(psCBuf->name[0] == '$')
{
cbName = "Globals";
}
else
{
cbName = psCBuf->name;
}
cbName = GetCBName(psCBuf->name);
cbName += ".";
// Drop the constant buffer name from subpass inputs
if (cbName.substr(0, 19) == "hlslcc_SubpassInput")
@ -702,13 +701,23 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
componentsNeeded = maxSwiz - minSwiz + 1;
}
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components)
// We have to pull down the swizzle array to match the first bit that's actually set
uint32_t tmpSwizzle[4] = { 0 };
int firstBitSet = 0;
if (ui32CompMask == 0)
ui32CompMask = 0xf;
while ((ui32CompMask & (1 << firstBitSet)) == 0)
firstBitSet++;
std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags);
// Get a possible dynamic array index
std::ostringstream dynIndexOss;
std::string dynamicIndexStr;
bool needsIndexCalcRevert = false;
bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1));
bool isUnityInstancingBuffer = isAoS && IsUnityFlexibleInstancingBuffer(psCBuf);
Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert);
if (psDynIndexOp != NULL)
@ -719,16 +728,18 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
if (eType != SVT_INT && eType != SVT_UINT)
opFlags = TO_AUTO_BITCAST_TO_INT;
dynIndexOss << TranslateOperand(psDynIndexOp, opFlags);
dynamicIndexStr = TranslateOperand(psDynIndexOp, opFlags, 0x1); // Just take the first component for the index
}
std::string dynamicIndexStr = dynIndexOss.str();
if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns))
{
// Simple case: just access one component
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
// Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index
if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0)
isFBInput = true;
if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS)))
{
// We'll need to add the prefix only to the last section of the name
@ -741,7 +752,7 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
fullName.insert(commaPos + 1, prefix);
}
oss << cbName << fullName;
oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer);
}
else
{
@ -769,18 +780,15 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags);
std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES);
if (tmpVarType->Class == SVC_SCALAR)
{
oss << cbName << fullName;
}
else
oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer);
if (tmpVarType->Class != SVC_SCALAR)
{
uint32_t swizzle;
tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3
swizzle = psOperand->aui32Swizzle[i] - tmpRebase;
oss << cbName << fullName << "." << ("xyzw"[swizzle]);
oss << "." << ("xyzw"[swizzle]);
}
}
oss << ")";
@ -799,7 +807,12 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1);
bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0);
if (hasDynamicIndex || hasImmediateIndex)
// Ignore index altogether on fb inputs
if (isFBInput)
{
// Nothing to do here
}
else if (hasDynamicIndex || hasImmediateIndex)
{
std::ostringstream fullIndexOss;
if (hasDynamicIndex && hasImmediateIndex)
@ -901,34 +914,47 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID:
{
// Not supported on Metal
ASSERT(0);
oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter.
*pui32IgnoreSwizzle = 1;
break;
}
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
{
oss << "ImmCB_" << psContext->currentPhase
<< "_" << psOperand->ui32RegisterNumber
<< "_" << psOperand->m_Rebase;
if (psOperand->m_SubOperands[0].get())
{
//Indexes must be integral. Offset is already taken care of above.
oss << "[" << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER) << "]";
}
if (psOperand->m_Size == 1)
*pui32IgnoreSwizzle = 1;
oss << "ImmCB_" << psContext->currentPhase;
oss << TranslateOperandIndex(psOperand, 0);
break;
}
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
{
// Not supported on Metal
ASSERT(0);
oss << "mtl_TessCoord";
break;
}
case OPERAND_TYPE_INPUT_CONTROL_POINT:
{
// Not supported on Metal
ASSERT(0);
int ignoreRedirect = 1;
int regSpace = psOperand->GetRegisterSpace(psContext);
if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) ||
(regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
{
ignoreRedirect = 0;
}
if (ignoreRedirect)
{
oss << "input.cp";
oss << TranslateOperandIndex(psOperand, 0);//Vertex index
oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle);
}
else
{
oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle);
oss << TranslateOperandIndex(psOperand, 0);//Vertex index
}
// Check for scalar
if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0)
*pui32IgnoreSwizzle = 1;
break;
}
case OPERAND_TYPE_NULL:
@ -939,8 +965,8 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
{
// Not supported on Metal
ASSERT(0);
oss << "controlPointID";
*pui32IgnoreSwizzle = 1;
break;
}
case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
@ -1030,9 +1056,101 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui
}
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
{
// Not supported on Metal
ASSERT(0);
const ShaderInfo::InOutSignature* psIn;
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn);
*piRebase = psIn->iRebase;
switch (psIn->eSystemValueType)
{
case NAME_POSITION:
oss << "mtl_Position";
break;
case NAME_RENDER_TARGET_ARRAY_INDEX:
oss << "mtl_Layer";
*pui32IgnoreSwizzle = 1;
break;
case NAME_CLIP_DISTANCE:
// this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes
char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex);
oss << tmpName;
*pui32IgnoreSwizzle = 1;
break;
case NAME_VIEWPORT_ARRAY_INDEX:
oss << "mtl_ViewPortIndex";
*pui32IgnoreSwizzle = 1;
break;
case NAME_VERTEX_ID:
oss << "mtl_VertexID";
*pui32IgnoreSwizzle = 1;
break;
case NAME_INSTANCE_ID:
oss << "mtl_InstanceID";
*pui32IgnoreSwizzle = 1;
break;
case NAME_IS_FRONT_FACE:
oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))";
*pui32IgnoreSwizzle = 1;
break;
case NAME_PRIMITIVE_ID:
// Not on Metal
ASSERT(0);
break;
case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
case NAME_FINAL_LINE_DENSITY_TESSFACTOR:
if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber])
oss << "edgeTessellationFactor";
else
oss << "edgeTessellationFactor[0]";
*pui32IgnoreSwizzle = 1;
break;
case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
case NAME_FINAL_LINE_DETAIL_TESSFACTOR:
oss << "edgeTessellationFactor[1]";
*pui32IgnoreSwizzle = 1;
break;
case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
oss << "edgeTessellationFactor[2]";
*pui32IgnoreSwizzle = 1;
break;
case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
oss << "edgeTessellationFactor[3]";
*pui32IgnoreSwizzle = 1;
break;
case NAME_FINAL_TRI_INSIDE_TESSFACTOR:
case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR:
if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber])
oss << "insideTessellationFactor";
else
oss << "insideTessellationFactor[0]";
*pui32IgnoreSwizzle = 1;
break;
case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR:
oss << "insideTessellationFactor[1]";
*pui32IgnoreSwizzle = 1;
break;
default:
const std::string patchPrefix = "patch.";
if (psContext->psShader->eShaderType == DOMAIN_SHADER)
oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex;
else
oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex;
// Disable swizzles if this is a scalar
if (psContext->psShader->eShaderType == HULL_SHADER)
{
if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0)
*pui32IgnoreSwizzle = 1;
}
else
{
if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0)
*pui32IgnoreSwizzle = 1;
}
break;
}
break;
}
default: