Initial release (cfe8342494bbc2)
This commit is contained in:
parent
07a739239e
commit
eea476093c
48
README.md
48
README.md
@ -1,2 +1,50 @@
|
||||
# HLSLcc
|
||||
DirectX shader bytecode cross compiler
|
||||
|
||||
Originally based on https://github.com/James-Jones/HLSLCrossCompiler.
|
||||
|
||||
This library takes DirectX bytecode as input, and translates it into the following languages:
|
||||
- GLSL (OpenGL 3.2 and later)
|
||||
- GLSL ES (OpenGL ES 3.0 and later)
|
||||
- GLSL ES for Vulkan consumption
|
||||
- Metal Shading Language
|
||||
|
||||
This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan.
|
||||
|
||||
Changes from original HLSLCrossCompiler:
|
||||
- Codebase changed to C++11, with major code reorganizations.
|
||||
- Support for multiple language output backends (currently ToGLSL and ToMetal)
|
||||
- Metal language output support
|
||||
- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts).
|
||||
- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form
|
||||
- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers.
|
||||
- Reflection interface to retrieve the shader inputs and their types.
|
||||
- Lots of workarounds for various driver/shader compiler bugs.
|
||||
- Lots of minor fixes and improvements for correctness
|
||||
- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself.
|
||||
|
||||
## Note
|
||||
|
||||
This project does not include build files, or test suite, as they are integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile src/*.cpp (in C++11 mode!) and src/cbstring/*.c with the following include paths:
|
||||
|
||||
- include
|
||||
- src/internal_includes
|
||||
- src/cbstrinc
|
||||
- src
|
||||
|
||||
The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input).
|
||||
|
||||
|
||||
## Contributors
|
||||
- Mikko Strandborg
|
||||
- Juho Oravainen
|
||||
- David Rogers
|
||||
- Marton Ekler
|
||||
- Antti Tapaninen
|
||||
- Florian Penzkofer
|
||||
- Alexey Orlov
|
||||
- Povilas Kanapickas
|
||||
|
||||
## License
|
||||
|
||||
See license.txt.
|
||||
|
493
include/ShaderInfo.h
Normal file
493
include/ShaderInfo.h
Normal file
@ -0,0 +1,493 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "growing_array.h"
|
||||
#include <stdint.h>
|
||||
//Reflection
|
||||
#define MAX_RESOURCE_BINDINGS 256
|
||||
|
||||
typedef enum _SHADER_VARIABLE_TYPE {
|
||||
SVT_VOID = 0,
|
||||
SVT_BOOL = 1,
|
||||
SVT_INT = 2,
|
||||
SVT_FLOAT = 3,
|
||||
SVT_STRING = 4,
|
||||
SVT_TEXTURE = 5,
|
||||
SVT_TEXTURE1D = 6,
|
||||
SVT_TEXTURE2D = 7,
|
||||
SVT_TEXTURE3D = 8,
|
||||
SVT_TEXTURECUBE = 9,
|
||||
SVT_SAMPLER = 10,
|
||||
SVT_PIXELSHADER = 15,
|
||||
SVT_VERTEXSHADER = 16,
|
||||
SVT_UINT = 19,
|
||||
SVT_UINT8 = 20,
|
||||
SVT_GEOMETRYSHADER = 21,
|
||||
SVT_RASTERIZER = 22,
|
||||
SVT_DEPTHSTENCIL = 23,
|
||||
SVT_BLEND = 24,
|
||||
SVT_BUFFER = 25,
|
||||
SVT_CBUFFER = 26,
|
||||
SVT_TBUFFER = 27,
|
||||
SVT_TEXTURE1DARRAY = 28,
|
||||
SVT_TEXTURE2DARRAY = 29,
|
||||
SVT_RENDERTARGETVIEW = 30,
|
||||
SVT_DEPTHSTENCILVIEW = 31,
|
||||
SVT_TEXTURE2DMS = 32,
|
||||
SVT_TEXTURE2DMSARRAY = 33,
|
||||
SVT_TEXTURECUBEARRAY = 34,
|
||||
SVT_HULLSHADER = 35,
|
||||
SVT_DOMAINSHADER = 36,
|
||||
SVT_INTERFACE_POINTER = 37,
|
||||
SVT_COMPUTESHADER = 38,
|
||||
SVT_DOUBLE = 39,
|
||||
SVT_RWTEXTURE1D = 40,
|
||||
SVT_RWTEXTURE1DARRAY = 41,
|
||||
SVT_RWTEXTURE2D = 42,
|
||||
SVT_RWTEXTURE2DARRAY = 43,
|
||||
SVT_RWTEXTURE3D = 44,
|
||||
SVT_RWBUFFER = 45,
|
||||
SVT_BYTEADDRESS_BUFFER = 46,
|
||||
SVT_RWBYTEADDRESS_BUFFER = 47,
|
||||
SVT_STRUCTURED_BUFFER = 48,
|
||||
SVT_RWSTRUCTURED_BUFFER = 49,
|
||||
SVT_APPEND_STRUCTURED_BUFFER = 50,
|
||||
SVT_CONSUME_STRUCTURED_BUFFER = 51,
|
||||
|
||||
|
||||
|
||||
// Only used as a marker when analyzing register types
|
||||
SVT_FORCED_INT = 152,
|
||||
// Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis
|
||||
SVT_INT_AMBIGUOUS = 153,
|
||||
|
||||
// Partial precision types. Used when doing type analysis
|
||||
SVT_FLOAT10 = 53, // Seems to be used in constant buffers
|
||||
SVT_FLOAT16 = 54,
|
||||
SVT_INT16 = 156,
|
||||
SVT_INT12 = 157,
|
||||
SVT_UINT16 = 158,
|
||||
|
||||
SVT_FORCE_DWORD = 0x7fffffff
|
||||
} SHADER_VARIABLE_TYPE;
|
||||
|
||||
typedef enum _SHADER_VARIABLE_CLASS {
|
||||
SVC_SCALAR = 0,
|
||||
SVC_VECTOR = (SVC_SCALAR + 1),
|
||||
SVC_MATRIX_ROWS = (SVC_VECTOR + 1),
|
||||
SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1),
|
||||
SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1),
|
||||
SVC_STRUCT = (SVC_OBJECT + 1),
|
||||
SVC_INTERFACE_CLASS = (SVC_STRUCT + 1),
|
||||
SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1),
|
||||
SVC_FORCE_DWORD = 0x7fffffff
|
||||
} SHADER_VARIABLE_CLASS;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Types
|
||||
|
||||
enum TESSELLATOR_PARTITIONING
|
||||
{
|
||||
TESSELLATOR_PARTITIONING_UNDEFINED = 0,
|
||||
TESSELLATOR_PARTITIONING_INTEGER = 1,
|
||||
TESSELLATOR_PARTITIONING_POW2 = 2,
|
||||
TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
|
||||
TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
|
||||
};
|
||||
|
||||
enum TESSELLATOR_OUTPUT_PRIMITIVE
|
||||
{
|
||||
TESSELLATOR_OUTPUT_UNDEFINED = 0,
|
||||
TESSELLATOR_OUTPUT_POINT = 1,
|
||||
TESSELLATOR_OUTPUT_LINE = 2,
|
||||
TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
|
||||
TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
|
||||
};
|
||||
|
||||
enum SPECIAL_NAME
|
||||
{
|
||||
NAME_UNDEFINED = 0,
|
||||
NAME_POSITION = 1,
|
||||
NAME_CLIP_DISTANCE = 2,
|
||||
NAME_CULL_DISTANCE = 3,
|
||||
NAME_RENDER_TARGET_ARRAY_INDEX = 4,
|
||||
NAME_VIEWPORT_ARRAY_INDEX = 5,
|
||||
NAME_VERTEX_ID = 6,
|
||||
NAME_PRIMITIVE_ID = 7,
|
||||
NAME_INSTANCE_ID = 8,
|
||||
NAME_IS_FRONT_FACE = 9,
|
||||
NAME_SAMPLE_INDEX = 10,
|
||||
// The following are added for D3D11
|
||||
NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11,
|
||||
NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12,
|
||||
NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13,
|
||||
NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14,
|
||||
NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15,
|
||||
NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16,
|
||||
NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17,
|
||||
NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18,
|
||||
NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19,
|
||||
NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20,
|
||||
NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
|
||||
NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
|
||||
};
|
||||
|
||||
|
||||
enum INOUT_COMPONENT_TYPE {
|
||||
INOUT_COMPONENT_UNKNOWN = 0,
|
||||
INOUT_COMPONENT_UINT32 = 1,
|
||||
INOUT_COMPONENT_SINT32 = 2,
|
||||
INOUT_COMPONENT_FLOAT32 = 3
|
||||
};
|
||||
|
||||
enum MIN_PRECISION {
|
||||
MIN_PRECISION_DEFAULT = 0,
|
||||
MIN_PRECISION_FLOAT_16 = 1,
|
||||
MIN_PRECISION_FLOAT_2_8 = 2,
|
||||
MIN_PRECISION_RESERVED = 3,
|
||||
MIN_PRECISION_SINT_16 = 4,
|
||||
MIN_PRECISION_UINT_16 = 5,
|
||||
MIN_PRECISION_ANY_16 = 0xf0,
|
||||
MIN_PRECISION_ANY_10 = 0xf1
|
||||
};
|
||||
|
||||
enum ResourceType
|
||||
{
|
||||
RTYPE_CBUFFER,//0
|
||||
RTYPE_TBUFFER,//1
|
||||
RTYPE_TEXTURE,//2
|
||||
RTYPE_SAMPLER,//3
|
||||
RTYPE_UAV_RWTYPED,//4
|
||||
RTYPE_STRUCTURED,//5
|
||||
RTYPE_UAV_RWSTRUCTURED,//6
|
||||
RTYPE_BYTEADDRESS,//7
|
||||
RTYPE_UAV_RWBYTEADDRESS,//8
|
||||
RTYPE_UAV_APPEND_STRUCTURED,//9
|
||||
RTYPE_UAV_CONSUME_STRUCTURED,//10
|
||||
RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11
|
||||
RTYPE_COUNT,
|
||||
};
|
||||
|
||||
enum ResourceGroup {
|
||||
RGROUP_CBUFFER,
|
||||
RGROUP_TEXTURE,
|
||||
RGROUP_SAMPLER,
|
||||
RGROUP_UAV,
|
||||
RGROUP_COUNT,
|
||||
};
|
||||
|
||||
enum REFLECT_RESOURCE_DIMENSION
|
||||
{
|
||||
REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0,
|
||||
REFLECT_RESOURCE_DIMENSION_BUFFER = 1,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
|
||||
REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11,
|
||||
};
|
||||
|
||||
enum REFLECT_RESOURCE_PRECISION
|
||||
{
|
||||
REFLECT_RESOURCE_PRECISION_UNKNOWN = 0,
|
||||
REFLECT_RESOURCE_PRECISION_LOWP = 1,
|
||||
REFLECT_RESOURCE_PRECISION_MEDIUMP = 2,
|
||||
REFLECT_RESOURCE_PRECISION_HIGHP = 3,
|
||||
|
||||
};
|
||||
|
||||
enum RESOURCE_RETURN_TYPE
|
||||
{
|
||||
RETURN_TYPE_UNORM = 1,
|
||||
RETURN_TYPE_SNORM = 2,
|
||||
RETURN_TYPE_SINT = 3,
|
||||
RETURN_TYPE_UINT = 4,
|
||||
RETURN_TYPE_FLOAT = 5,
|
||||
RETURN_TYPE_MIXED = 6,
|
||||
RETURN_TYPE_DOUBLE = 7,
|
||||
RETURN_TYPE_CONTINUED = 8,
|
||||
RETURN_TYPE_UNUSED = 9,
|
||||
};
|
||||
|
||||
typedef std::map<std::string, REFLECT_RESOURCE_PRECISION> HLSLccSamplerPrecisionInfo;
|
||||
|
||||
struct ResourceBinding
|
||||
{
|
||||
std::string name;
|
||||
ResourceType eType;
|
||||
uint32_t ui32BindPoint;
|
||||
uint32_t ui32BindCount;
|
||||
uint32_t ui32Flags;
|
||||
REFLECT_RESOURCE_DIMENSION eDimension;
|
||||
RESOURCE_RETURN_TYPE ui32ReturnType;
|
||||
uint32_t ui32NumSamples;
|
||||
REFLECT_RESOURCE_PRECISION ePrecision;
|
||||
|
||||
SHADER_VARIABLE_TYPE GetDataType() const
|
||||
{
|
||||
switch (ePrecision)
|
||||
{
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT10;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT16;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT10;
|
||||
}
|
||||
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT16;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT16;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT16;
|
||||
}
|
||||
|
||||
default:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT;
|
||||
case RETURN_TYPE_DOUBLE:
|
||||
return SVT_DOUBLE;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderVarType
|
||||
{
|
||||
ShaderVarType() :
|
||||
Class(),
|
||||
Type(),
|
||||
Rows(),
|
||||
Columns(),
|
||||
Elements(),
|
||||
MemberCount(),
|
||||
Offset(),
|
||||
ParentCount(),
|
||||
Parent(),
|
||||
m_IsUsed(false)
|
||||
{}
|
||||
|
||||
SHADER_VARIABLE_CLASS Class;
|
||||
SHADER_VARIABLE_TYPE Type;
|
||||
uint32_t Rows;
|
||||
uint32_t Columns;
|
||||
uint32_t Elements;
|
||||
uint32_t MemberCount;
|
||||
uint32_t Offset;
|
||||
std::string name;
|
||||
|
||||
uint32_t ParentCount;
|
||||
struct ShaderVarType * Parent;
|
||||
//Includes all parent names.
|
||||
std::string fullName;
|
||||
|
||||
std::vector<struct ShaderVarType> Members;
|
||||
|
||||
bool m_IsUsed; // If not set, is not used in the shader code
|
||||
|
||||
uint32_t GetMemberCount() const
|
||||
{
|
||||
if (Class == SVC_STRUCT)
|
||||
{
|
||||
uint32_t res = 0;
|
||||
std::vector<struct ShaderVarType>::const_iterator itr;
|
||||
for (itr = Members.begin(); itr != Members.end(); itr++)
|
||||
{
|
||||
res += itr->GetMemberCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct ShaderVar
|
||||
{
|
||||
std::string name;
|
||||
int haveDefaultValue;
|
||||
std::vector<uint32_t> pui32DefaultValues;
|
||||
//Offset/Size in bytes.
|
||||
uint32_t ui32StartOffset;
|
||||
uint32_t ui32Size;
|
||||
|
||||
ShaderVarType sType;
|
||||
};
|
||||
|
||||
struct ConstantBuffer
|
||||
{
|
||||
std::string name;
|
||||
|
||||
std::vector<ShaderVar> asVars;
|
||||
|
||||
uint32_t ui32TotalSizeInBytes;
|
||||
|
||||
uint32_t GetMemberCount(bool stripUnused) const
|
||||
{
|
||||
uint32_t res = 0;
|
||||
std::vector<ShaderVar>::const_iterator itr;
|
||||
for (itr = asVars.begin(); itr != asVars.end(); itr++)
|
||||
{
|
||||
if(stripUnused && !itr->sType.m_IsUsed)
|
||||
continue;
|
||||
res += itr->sType.GetMemberCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
struct ClassType
|
||||
{
|
||||
std::string name;
|
||||
uint16_t ui16ID;
|
||||
uint16_t ui16ConstBufStride;
|
||||
uint16_t ui16Texture;
|
||||
uint16_t ui16Sampler;
|
||||
};
|
||||
|
||||
struct ClassInstance
|
||||
{
|
||||
std::string name;
|
||||
uint16_t ui16ID;
|
||||
uint16_t ui16ConstBuf;
|
||||
uint16_t ui16ConstBufOffset;
|
||||
uint16_t ui16Texture;
|
||||
uint16_t ui16Sampler;
|
||||
};
|
||||
|
||||
class Operand;
|
||||
|
||||
class ShaderInfo
|
||||
{
|
||||
public:
|
||||
|
||||
struct InOutSignature
|
||||
{
|
||||
std::string semanticName;
|
||||
uint32_t ui32SemanticIndex;
|
||||
SPECIAL_NAME eSystemValueType;
|
||||
INOUT_COMPONENT_TYPE eComponentType;
|
||||
uint32_t ui32Register;
|
||||
uint32_t ui32Mask;
|
||||
uint32_t ui32ReadWriteMask;
|
||||
|
||||
int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle
|
||||
|
||||
uint32_t ui32Stream;
|
||||
MIN_PRECISION eMinPrec;
|
||||
|
||||
std::set<uint32_t> isIndexed; // Set of phases where this input/output is part of a index range.
|
||||
std::map<uint32_t, uint32_t> indexStart; // If indexed, contains the start index for the range
|
||||
std::map<uint32_t, uint32_t> index; // If indexed, contains the current index relative to the index start.
|
||||
|
||||
};
|
||||
|
||||
ShaderInfo() :
|
||||
ui32MajorVersion(),
|
||||
ui32MinorVersion(),
|
||||
psResourceBindings(),
|
||||
psConstantBuffers(),
|
||||
psThisPointerConstBuffer(),
|
||||
psClassTypes(),
|
||||
psClassInstances()
|
||||
{}
|
||||
|
||||
SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo);
|
||||
|
||||
int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const;
|
||||
|
||||
void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const;
|
||||
|
||||
int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const;
|
||||
|
||||
int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
|
||||
int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
|
||||
int GetOutputSignatureFromRegister(const uint32_t ui32Register,
|
||||
const uint32_t ui32CompMask,
|
||||
const uint32_t ui32Stream,
|
||||
const InOutSignature** ppsOut,
|
||||
bool allowNull = false) const;
|
||||
|
||||
int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const;
|
||||
|
||||
static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
|
||||
|
||||
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
|
||||
const uint32_t (&pui32Swizzle)[4],
|
||||
const ConstantBuffer* psCBuf,
|
||||
const ShaderVarType** ppsShaderVar,
|
||||
bool* isArray,
|
||||
std::vector<uint32_t>* arrayIndices,
|
||||
int32_t* pi32Rebase,
|
||||
uint32_t flags);
|
||||
|
||||
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices);
|
||||
|
||||
// Apply shader precision information to resource bindings
|
||||
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
|
||||
|
||||
uint32_t ui32MajorVersion;
|
||||
uint32_t ui32MinorVersion;
|
||||
|
||||
std::vector<InOutSignature> psInputSignatures;
|
||||
std::vector<InOutSignature> psOutputSignatures;
|
||||
std::vector<InOutSignature> psPatchConstantSignatures;
|
||||
|
||||
std::vector<ResourceBinding> psResourceBindings;
|
||||
|
||||
std::vector<ConstantBuffer> psConstantBuffers;
|
||||
ConstantBuffer* psThisPointerConstBuffer;
|
||||
|
||||
std::vector<ClassType> psClassTypes;
|
||||
std::vector<ClassInstance> psClassInstances;
|
||||
|
||||
//Func table ID to class name ID.
|
||||
HLSLcc::growing_vector<uint32_t> aui32TableIDToTypeID;
|
||||
|
||||
HLSLcc::growing_vector<uint32_t> aui32ResourceMap[RGROUP_COUNT];
|
||||
|
||||
HLSLcc::growing_vector<ShaderVarType> sGroupSharedVarType;
|
||||
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
};
|
||||
|
47
include/growing_array.h
Normal file
47
include/growing_array.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
// A vector that automatically grows when written to, fills the intermediate ones with default value.
|
||||
// Reading from an index returns the default value if attempting to access out of bounds.
|
||||
template <class T> class growing_vector
|
||||
{
|
||||
public:
|
||||
growing_vector() : data() {}
|
||||
|
||||
std::vector<T> data;
|
||||
|
||||
T & operator[](std::size_t idx)
|
||||
{
|
||||
if (idx >= data.size())
|
||||
data.resize((idx + 1) * 2);
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
const T & operator[](std::size_t idx) const
|
||||
{
|
||||
static T defaultValue = T();
|
||||
if (idx >= data.size())
|
||||
return defaultValue;
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Same but with bool specialization
|
||||
template <> class growing_vector<bool>
|
||||
{
|
||||
public:
|
||||
growing_vector() : data() {}
|
||||
|
||||
std::vector<bool> data;
|
||||
|
||||
std::vector<bool>::reference operator[](std::size_t idx)
|
||||
{
|
||||
if (idx >= data.size())
|
||||
data.resize((idx + 1) * 2, false);
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
};
|
||||
};
|
454
include/hlslcc.h
Normal file
454
include/hlslcc.h
Normal file
@ -0,0 +1,454 @@
|
||||
#ifndef HLSLCC_H_
|
||||
#define HLSLCC_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#if defined (_WIN32) && defined(HLSLCC_DYNLIB)
|
||||
#define HLSLCC_APIENTRY __stdcall
|
||||
#if defined(libHLSLcc_EXPORTS)
|
||||
#define HLSLCC_API __declspec(dllexport)
|
||||
#else
|
||||
#define HLSLCC_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define HLSLCC_APIENTRY
|
||||
#define HLSLCC_API
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
LANG_DEFAULT,// Depends on the HLSL shader model.
|
||||
LANG_ES_100, LANG_ES_FIRST=LANG_ES_100,
|
||||
LANG_ES_300,
|
||||
LANG_ES_310, LANG_ES_LAST = LANG_ES_310,
|
||||
LANG_120, LANG_GL_FIRST = LANG_120,
|
||||
LANG_130,
|
||||
LANG_140,
|
||||
LANG_150,
|
||||
LANG_330,
|
||||
LANG_400,
|
||||
LANG_410,
|
||||
LANG_420,
|
||||
LANG_430,
|
||||
LANG_440, LANG_GL_LAST = LANG_440,
|
||||
LANG_METAL,
|
||||
} GLLang;
|
||||
|
||||
typedef struct GlExtensions {
|
||||
uint32_t ARB_explicit_attrib_location : 1;
|
||||
uint32_t ARB_explicit_uniform_location : 1;
|
||||
uint32_t ARB_shading_language_420pack : 1;
|
||||
}GlExtensions;
|
||||
|
||||
#include "ShaderInfo.h"
|
||||
|
||||
typedef std::vector<std::string> TextureSamplerPairs;
|
||||
|
||||
typedef enum INTERPOLATION_MODE
|
||||
{
|
||||
INTERPOLATION_UNDEFINED = 0,
|
||||
INTERPOLATION_CONSTANT = 1,
|
||||
INTERPOLATION_LINEAR = 2,
|
||||
INTERPOLATION_LINEAR_CENTROID = 3,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
|
||||
INTERPOLATION_LINEAR_SAMPLE = 6,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7,
|
||||
} INTERPOLATION_MODE;
|
||||
|
||||
#define PS_FLAG_VERTEX_SHADER 0x1
|
||||
#define PS_FLAG_HULL_SHADER 0x2
|
||||
#define PS_FLAG_DOMAIN_SHADER 0x4
|
||||
#define PS_FLAG_GEOMETRY_SHADER 0x8
|
||||
#define PS_FLAG_PIXEL_SHADER 0x10
|
||||
|
||||
#define TO_FLAG_NONE 0x0
|
||||
#define TO_FLAG_INTEGER 0x1
|
||||
#define TO_FLAG_NAME_ONLY 0x2
|
||||
#define TO_FLAG_DECLARATION_NAME 0x4
|
||||
#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment.
|
||||
#define TO_FLAG_UNSIGNED_INTEGER 0x10
|
||||
#define TO_FLAG_DOUBLE 0x20
|
||||
// --- TO_AUTO_BITCAST_TO_FLOAT ---
|
||||
//If the operand is an integer temp variable then this flag
|
||||
//indicates that the temp has a valid floating point encoding
|
||||
//and that the current expression expects the operand to be floating point
|
||||
//and therefore intBitsToFloat must be applied to that variable.
|
||||
#define TO_AUTO_BITCAST_TO_FLOAT 0x40
|
||||
#define TO_AUTO_BITCAST_TO_INT 0x80
|
||||
#define TO_AUTO_BITCAST_TO_UINT 0x100
|
||||
// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX
|
||||
// to match HLSL functionality.
|
||||
#define TO_AUTO_EXPAND_TO_VEC2 0x200
|
||||
#define TO_AUTO_EXPAND_TO_VEC3 0x400
|
||||
#define TO_AUTO_EXPAND_TO_VEC4 0x800
|
||||
#define TO_FLAG_BOOL 0x1000
|
||||
// These flags are only used for Metal:
|
||||
// Force downscaling of the operand to match
|
||||
// the other operand (Metal doesn't like mixing halfs with floats)
|
||||
#define TO_FLAG_FORCE_HALF 0x2000
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INVALID_SHADER = -1,
|
||||
PIXEL_SHADER,
|
||||
VERTEX_SHADER,
|
||||
GEOMETRY_SHADER,
|
||||
HULL_SHADER,
|
||||
DOMAIN_SHADER,
|
||||
COMPUTE_SHADER,
|
||||
} SHADER_TYPE;
|
||||
|
||||
// Enum for texture dimension reflection data
|
||||
typedef enum
|
||||
{
|
||||
TD_FLOAT = 0,
|
||||
TD_INT,
|
||||
TD_2D,
|
||||
TD_3D,
|
||||
TD_CUBE,
|
||||
TD_2DSHADOW,
|
||||
TD_2DARRAY,
|
||||
TD_CUBEARRAY
|
||||
} HLSLCC_TEX_DIMENSION;
|
||||
|
||||
// The prefix for all temporary variables used by the generated code.
|
||||
// Using a texture or uniform name like this will cause conflicts
|
||||
#define HLSLCC_TEMP_PREFIX "u_xlat"
|
||||
|
||||
//The shader stages (Vertex, Pixel et al) do not depend on each other
|
||||
//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
|
||||
//that hull shaders must be compiled before domain shaders, and
|
||||
//the pixel shader must be compiled before all of the others.
|
||||
//During compilation the GLSLCrossDependencyData struct will
|
||||
//carry over any information needed about a different shader stage
|
||||
//in order to construct valid GLSL shader combinations.
|
||||
|
||||
//Using GLSLCrossDependencyData is optional. However some shader
|
||||
//combinations may show link failures, or runtime errors.
|
||||
class GLSLCrossDependencyData
|
||||
{
|
||||
public:
|
||||
// A container for a single Vulkan resource binding (<set, binding> pair)
|
||||
typedef std::pair<uint32_t, uint32_t> VulkanResourceBinding;
|
||||
|
||||
private:
|
||||
//Required if PixelInterpDependency is true
|
||||
std::vector<INTERPOLATION_MODE> pixelInterpolation;
|
||||
|
||||
// Map of varying locations, indexed by varying names.
|
||||
typedef std::map<std::string, uint32_t> VaryingLocations;
|
||||
|
||||
static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output)
|
||||
|
||||
VaryingLocations varyingLocationsMap[MAX_NAMESPACES];
|
||||
uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES];
|
||||
|
||||
typedef std::map<std::string, VulkanResourceBinding> VulkanResourceBindings;
|
||||
VulkanResourceBindings m_VulkanResourceBindings;
|
||||
uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set.
|
||||
|
||||
inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
|
||||
{
|
||||
switch (eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return isInput ? 0 : 1;
|
||||
|
||||
case HULL_SHADER:
|
||||
return isInput ? 1 : 2;
|
||||
|
||||
case DOMAIN_SHADER:
|
||||
return isInput ? 2 : 3;
|
||||
|
||||
case GEOMETRY_SHADER:
|
||||
// The input depends on whether there's a tessellation shader before us
|
||||
if (isInput)
|
||||
{
|
||||
return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1;
|
||||
}
|
||||
return 4;
|
||||
|
||||
case PIXEL_SHADER:
|
||||
// The inputs can come from geom shader, domain shader or directly from vertex shader
|
||||
if (isInput)
|
||||
{
|
||||
if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 5; // This value never really used
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public:
|
||||
GLSLCrossDependencyData()
|
||||
: eTessPartitioning(),
|
||||
eTessOutPrim(),
|
||||
ui32ProgramStages(0)
|
||||
{
|
||||
memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
|
||||
memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
|
||||
}
|
||||
|
||||
|
||||
// Retrieve the location for a varying with a given name.
|
||||
// If the name doesn't already have an allocated location, allocate one
|
||||
// and store it into the map.
|
||||
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput)
|
||||
{
|
||||
int nspace = GetVaryingNamespace(eShaderType, isInput);
|
||||
VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
|
||||
if (itr != varyingLocationsMap[nspace].end())
|
||||
return itr->second;
|
||||
|
||||
uint32_t newKey = nextAvailableVaryingLocation[nspace];
|
||||
nextAvailableVaryingLocation[nspace]++;
|
||||
varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
|
||||
return newKey;
|
||||
}
|
||||
|
||||
// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
|
||||
// If not found, allocate a new one (in set 0) and return that
|
||||
// The returned value is a pair of <set, binding>
|
||||
// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
|
||||
// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
|
||||
// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
|
||||
inline std::pair<uint32_t, uint32_t> GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
|
||||
{
|
||||
// scan for the special marker
|
||||
const char *marker = "Xhlslcc_set_%d_bind_%dX";
|
||||
uint32_t Set = 0, Binding = 0;
|
||||
size_t startLoc = name.find("Xhlslcc");
|
||||
if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2))
|
||||
{
|
||||
// Get rid of all markers
|
||||
while ((startLoc = name.find("Xhlslcc")) != std::string::npos)
|
||||
{
|
||||
size_t endLoc = name.find('X', startLoc + 1);
|
||||
if (endLoc == std::string::npos)
|
||||
break;
|
||||
name.erase(startLoc, endLoc - startLoc + 1);
|
||||
}
|
||||
// Add to map
|
||||
VulkanResourceBinding newBind = std::make_pair(Set, Binding);
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
|
||||
if (allocRoomForCounter)
|
||||
{
|
||||
VulkanResourceBinding counterBind = std::make_pair(Set, Binding+1);
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
|
||||
}
|
||||
|
||||
return newBind;
|
||||
}
|
||||
|
||||
VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name);
|
||||
if (itr != m_VulkanResourceBindings.end())
|
||||
return itr->second;
|
||||
|
||||
// Allocate a new one
|
||||
VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
|
||||
m_NextAvailableVulkanResourceBinding[preferredSet]++;
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
|
||||
if (allocRoomForCounter)
|
||||
{
|
||||
VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
|
||||
m_NextAvailableVulkanResourceBinding[preferredSet]++;
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
|
||||
}
|
||||
return newBind;
|
||||
}
|
||||
|
||||
//dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
|
||||
//but they appear on inputs inside domain shaders for GL.
|
||||
//Hull shader must be compiled before domain so the
|
||||
//ensure correct partitioning and primitive type information
|
||||
//can be saved when compiling hull and passed to domain compilation.
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
|
||||
// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
|
||||
// Needed so we can construct proper shader input and output names
|
||||
uint32_t ui32ProgramStages;
|
||||
|
||||
inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo)
|
||||
{
|
||||
if (regNo >= pixelInterpolation.size())
|
||||
return INTERPOLATION_UNDEFINED;
|
||||
else
|
||||
return pixelInterpolation[regNo];
|
||||
}
|
||||
|
||||
inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode)
|
||||
{
|
||||
if (regNo >= pixelInterpolation.size())
|
||||
pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED);
|
||||
|
||||
pixelInterpolation[regNo] = mode;
|
||||
}
|
||||
|
||||
inline void ClearCrossDependencyData()
|
||||
{
|
||||
pixelInterpolation.clear();
|
||||
for (int i = 0; i < MAX_NAMESPACES; i++)
|
||||
{
|
||||
varyingLocationsMap[i].clear();
|
||||
nextAvailableVaryingLocation[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct GLSLShader
|
||||
{
|
||||
int shaderType; //One of the GL enums.
|
||||
std::string sourceCode;
|
||||
ShaderInfo reflection;
|
||||
GLLang GLSLLanguage;
|
||||
TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out
|
||||
};
|
||||
|
||||
// Interface for retrieving reflection and diagnostics data
|
||||
class HLSLccReflection
|
||||
{
|
||||
public:
|
||||
HLSLccReflection() {}
|
||||
virtual ~HLSLccReflection() {}
|
||||
|
||||
// Called on errors or diagnostic messages
|
||||
virtual void OnDiagnostics(const std::string &error, int line, bool isError) {}
|
||||
|
||||
virtual void OnInputBinding(const std::string &name, int bindIndex) {}
|
||||
|
||||
virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; }
|
||||
|
||||
virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; }
|
||||
|
||||
virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
|
||||
|
||||
virtual void OnTextureBinding(const std::string &name, int bindIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
|
||||
|
||||
virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
|
||||
|
||||
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
|
||||
};
|
||||
|
||||
|
||||
/*HLSL constant buffers are treated as default-block unform arrays by default. This is done
|
||||
to support versions of GLSL which lack ARB_uniform_buffer_object functionality.
|
||||
Setting this flag causes each one to have its own uniform block.
|
||||
Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/
|
||||
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8;
|
||||
|
||||
//GS enabled?
|
||||
//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS).
|
||||
//This flag is needed in order for the interfaces between stages to match when GS is in use.
|
||||
//PS inputs VtxGeoOutput
|
||||
//GS outputs VtxGeoOutput
|
||||
//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise.
|
||||
static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20;
|
||||
|
||||
//Either use this flag or glBindFragDataLocationIndexed.
|
||||
//When set the first pixel shader output is the first input to blend
|
||||
//equation, the others go to the second input.
|
||||
static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40;
|
||||
|
||||
//If set, shader inputs and outputs are declared with their semantic name.
|
||||
static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80;
|
||||
//If set, shader inputs and outputs are declared with their semantic name appended.
|
||||
static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100;
|
||||
|
||||
//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername".
|
||||
static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200;
|
||||
|
||||
//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that)
|
||||
static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400;
|
||||
|
||||
//If set, global uniforms are not stored in a struct.
|
||||
static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800;
|
||||
|
||||
//If set, image declarations will always have binding and format qualifiers.
|
||||
static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
|
||||
|
||||
// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers
|
||||
// Also removes that prefix from generated output
|
||||
static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
|
||||
|
||||
// If set, adds location qualifiers to intra-shader varyings.
|
||||
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000;
|
||||
|
||||
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS
|
||||
// so that if that macro is defined, all UBO declarations will become normal uniforms
|
||||
static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
|
||||
|
||||
// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
|
||||
static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000;
|
||||
|
||||
#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d"
|
||||
|
||||
// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtx<rows>x<cols>'
|
||||
static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000;
|
||||
|
||||
// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "<Name>_hlslcc_set_X_bind_Y"
|
||||
// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData)
|
||||
static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
|
||||
|
||||
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
|
||||
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result
|
||||
);
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
5
include/hlslcc.hpp
Normal file
5
include/hlslcc.hpp
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
extern "C" {
|
||||
#include "hlslcc.h"
|
||||
}
|
||||
|
800
include/pstdint.h
Normal file
800
include/pstdint.h
Normal file
@ -0,0 +1,800 @@
|
||||
/* A portable stdint.h
|
||||
****************************************************************************
|
||||
* BSD License:
|
||||
****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2005-2011 Paul Hsieh
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************
|
||||
*
|
||||
* Version 0.1.12
|
||||
*
|
||||
* The ANSI C standard committee, for the C99 standard, specified the
|
||||
* inclusion of a new standard include file called stdint.h. This is
|
||||
* a very useful and long desired include file which contains several
|
||||
* very precise definitions for integer scalar types that is
|
||||
* critically important for making portable several classes of
|
||||
* applications including cryptography, hashing, variable length
|
||||
* integer libraries and so on. But for most developers its likely
|
||||
* useful just for programming sanity.
|
||||
*
|
||||
* The problem is that most compiler vendors have decided not to
|
||||
* implement the C99 standard, and the next C++ language standard
|
||||
* (which has a lot more mindshare these days) will be a long time in
|
||||
* coming and its unknown whether or not it will include stdint.h or
|
||||
* how much adoption it will have. Either way, it will be a long time
|
||||
* before all compilers come with a stdint.h and it also does nothing
|
||||
* for the extremely large number of compilers available today which
|
||||
* do not include this file, or anything comparable to it.
|
||||
*
|
||||
* So that's what this file is all about. Its an attempt to build a
|
||||
* single universal include file that works on as many platforms as
|
||||
* possible to deliver what stdint.h is supposed to. A few things
|
||||
* that should be noted about this file:
|
||||
*
|
||||
* 1) It is not guaranteed to be portable and/or present an identical
|
||||
* interface on all platforms. The extreme variability of the
|
||||
* ANSI C standard makes this an impossibility right from the
|
||||
* very get go. Its really only meant to be useful for the vast
|
||||
* majority of platforms that possess the capability of
|
||||
* implementing usefully and precisely defined, standard sized
|
||||
* integer scalars. Systems which are not intrinsically 2s
|
||||
* complement may produce invalid constants.
|
||||
*
|
||||
* 2) There is an unavoidable use of non-reserved symbols.
|
||||
*
|
||||
* 3) Other standard include files are invoked.
|
||||
*
|
||||
* 4) This file may come in conflict with future platforms that do
|
||||
* include stdint.h. The hope is that one or the other can be
|
||||
* used with no real difference.
|
||||
*
|
||||
* 5) In the current verison, if your platform can't represent
|
||||
* int32_t, int16_t and int8_t, it just dumps out with a compiler
|
||||
* error.
|
||||
*
|
||||
* 6) 64 bit integers may or may not be defined. Test for their
|
||||
* presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
|
||||
* Note that this is different from the C99 specification which
|
||||
* requires the existence of 64 bit support in the compiler. If
|
||||
* this is not defined for your platform, yet it is capable of
|
||||
* dealing with 64 bits then it is because this file has not yet
|
||||
* been extended to cover all of your system's capabilities.
|
||||
*
|
||||
* 7) (u)intptr_t may or may not be defined. Test for its presence
|
||||
* with the test: #ifdef PTRDIFF_MAX. If this is not defined
|
||||
* for your platform, then it is because this file has not yet
|
||||
* been extended to cover all of your system's capabilities, not
|
||||
* because its optional.
|
||||
*
|
||||
* 8) The following might not been defined even if your platform is
|
||||
* capable of defining it:
|
||||
*
|
||||
* WCHAR_MIN
|
||||
* WCHAR_MAX
|
||||
* (u)int64_t
|
||||
* PTRDIFF_MIN
|
||||
* PTRDIFF_MAX
|
||||
* (u)intptr_t
|
||||
*
|
||||
* 9) The following have not been defined:
|
||||
*
|
||||
* WINT_MIN
|
||||
* WINT_MAX
|
||||
*
|
||||
* 10) The criteria for defining (u)int_least(*)_t isn't clear,
|
||||
* except for systems which don't have a type that precisely
|
||||
* defined 8, 16, or 32 bit types (which this include file does
|
||||
* not support anyways). Default definitions have been given.
|
||||
*
|
||||
* 11) The criteria for defining (u)int_fast(*)_t isn't something I
|
||||
* would trust to any particular compiler vendor or the ANSI C
|
||||
* committee. It is well known that "compatible systems" are
|
||||
* commonly created that have very different performance
|
||||
* characteristics from the systems they are compatible with,
|
||||
* especially those whose vendors make both the compiler and the
|
||||
* system. Default definitions have been given, but its strongly
|
||||
* recommended that users never use these definitions for any
|
||||
* reason (they do *NOT* deliver any serious guarantee of
|
||||
* improved performance -- not in this file, nor any vendor's
|
||||
* stdint.h).
|
||||
*
|
||||
* 12) The following macros:
|
||||
*
|
||||
* PRINTF_INTMAX_MODIFIER
|
||||
* PRINTF_INT64_MODIFIER
|
||||
* PRINTF_INT32_MODIFIER
|
||||
* PRINTF_INT16_MODIFIER
|
||||
* PRINTF_LEAST64_MODIFIER
|
||||
* PRINTF_LEAST32_MODIFIER
|
||||
* PRINTF_LEAST16_MODIFIER
|
||||
* PRINTF_INTPTR_MODIFIER
|
||||
*
|
||||
* are strings which have been defined as the modifiers required
|
||||
* for the "d", "u" and "x" printf formats to correctly output
|
||||
* (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
|
||||
* (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
|
||||
* PRINTF_INTPTR_MODIFIER is not defined for some systems which
|
||||
* provide their own stdint.h. PRINTF_INT64_MODIFIER is not
|
||||
* defined if INT64_MAX is not defined. These are an extension
|
||||
* beyond what C99 specifies must be in stdint.h.
|
||||
*
|
||||
* In addition, the following macros are defined:
|
||||
*
|
||||
* PRINTF_INTMAX_HEX_WIDTH
|
||||
* PRINTF_INT64_HEX_WIDTH
|
||||
* PRINTF_INT32_HEX_WIDTH
|
||||
* PRINTF_INT16_HEX_WIDTH
|
||||
* PRINTF_INT8_HEX_WIDTH
|
||||
* PRINTF_INTMAX_DEC_WIDTH
|
||||
* PRINTF_INT64_DEC_WIDTH
|
||||
* PRINTF_INT32_DEC_WIDTH
|
||||
* PRINTF_INT16_DEC_WIDTH
|
||||
* PRINTF_INT8_DEC_WIDTH
|
||||
*
|
||||
* Which specifies the maximum number of characters required to
|
||||
* print the number of that type in either hexadecimal or decimal.
|
||||
* These are an extension beyond what C99 specifies must be in
|
||||
* stdint.h.
|
||||
*
|
||||
* Compilers tested (all with 0 warnings at their highest respective
|
||||
* settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
|
||||
* bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
|
||||
* .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
|
||||
*
|
||||
* This file should be considered a work in progress. Suggestions for
|
||||
* improvements, especially those which increase coverage are strongly
|
||||
* encouraged.
|
||||
*
|
||||
* Acknowledgements
|
||||
*
|
||||
* The following people have made significant contributions to the
|
||||
* development and testing of this file:
|
||||
*
|
||||
* Chris Howie
|
||||
* John Steele Scott
|
||||
* Dave Thorup
|
||||
* John Dill
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
|
||||
/*
|
||||
* For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
|
||||
* do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
|
||||
*/
|
||||
|
||||
#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
|
||||
#include <stdint.h>
|
||||
#define _PSTDINT_H_INCLUDED
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INT64_HEX_WIDTH
|
||||
# define PRINTF_INT64_HEX_WIDTH "16"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_HEX_WIDTH
|
||||
# define PRINTF_INT32_HEX_WIDTH "8"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_HEX_WIDTH
|
||||
# define PRINTF_INT16_HEX_WIDTH "4"
|
||||
# endif
|
||||
# ifndef PRINTF_INT8_HEX_WIDTH
|
||||
# define PRINTF_INT8_HEX_WIDTH "2"
|
||||
# endif
|
||||
# ifndef PRINTF_INT64_DEC_WIDTH
|
||||
# define PRINTF_INT64_DEC_WIDTH "20"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_DEC_WIDTH
|
||||
# define PRINTF_INT32_DEC_WIDTH "10"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_DEC_WIDTH
|
||||
# define PRINTF_INT16_DEC_WIDTH "5"
|
||||
# endif
|
||||
# ifndef PRINTF_INT8_DEC_WIDTH
|
||||
# define PRINTF_INT8_DEC_WIDTH "3"
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
|
||||
# endif
|
||||
|
||||
/*
|
||||
* Something really weird is going on with Open Watcom. Just pull some of
|
||||
* these duplicated definitions from Open Watcom's stdint.h file for now.
|
||||
*/
|
||||
|
||||
# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
|
||||
# if !defined (INT64_C)
|
||||
# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
|
||||
# endif
|
||||
# if !defined (UINT64_C)
|
||||
# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
|
||||
# endif
|
||||
# if !defined (INT32_C)
|
||||
# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
|
||||
# endif
|
||||
# if !defined (UINT32_C)
|
||||
# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
|
||||
# endif
|
||||
# if !defined (INT16_C)
|
||||
# define INT16_C(x) (x)
|
||||
# endif
|
||||
# if !defined (UINT16_C)
|
||||
# define UINT16_C(x) (x)
|
||||
# endif
|
||||
# if !defined (INT8_C)
|
||||
# define INT8_C(x) (x)
|
||||
# endif
|
||||
# if !defined (UINT8_C)
|
||||
# define UINT8_C(x) (x)
|
||||
# endif
|
||||
# if !defined (UINT64_MAX)
|
||||
# define UINT64_MAX 18446744073709551615ULL
|
||||
# endif
|
||||
# if !defined (INT64_MAX)
|
||||
# define INT64_MAX 9223372036854775807LL
|
||||
# endif
|
||||
# if !defined (UINT32_MAX)
|
||||
# define UINT32_MAX 4294967295UL
|
||||
# endif
|
||||
# if !defined (INT32_MAX)
|
||||
# define INT32_MAX 2147483647L
|
||||
# endif
|
||||
# if !defined (INTMAX_MAX)
|
||||
# define INTMAX_MAX INT64_MAX
|
||||
# endif
|
||||
# if !defined (INTMAX_MIN)
|
||||
# define INTMAX_MIN INT64_MIN
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef _PSTDINT_H_INCLUDED
|
||||
#define _PSTDINT_H_INCLUDED
|
||||
|
||||
#ifndef SIZE_MAX
|
||||
# define SIZE_MAX (~(size_t)0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Deduce the type assignments from limits.h under the assumption that
|
||||
* integer sizes in bits are powers of 2, and follow the ANSI
|
||||
* definitions.
|
||||
*/
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
# define UINT8_MAX 0xff
|
||||
#endif
|
||||
#ifndef uint8_t
|
||||
# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
|
||||
typedef unsigned char uint8_t;
|
||||
# define UINT8_C(v) ((uint8_t) v)
|
||||
# else
|
||||
# error "Platform not supported"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
# define INT8_MAX 0x7f
|
||||
#endif
|
||||
#ifndef INT8_MIN
|
||||
# define INT8_MIN INT8_C(0x80)
|
||||
#endif
|
||||
#ifndef int8_t
|
||||
# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
|
||||
typedef signed char int8_t;
|
||||
# define INT8_C(v) ((int8_t) v)
|
||||
# else
|
||||
# error "Platform not supported"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef UINT16_MAX
|
||||
# define UINT16_MAX 0xffff
|
||||
#endif
|
||||
#ifndef uint16_t
|
||||
#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
|
||||
typedef unsigned int uint16_t;
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER ""
|
||||
# endif
|
||||
# define UINT16_C(v) ((uint16_t) (v))
|
||||
#elif (USHRT_MAX == UINT16_MAX)
|
||||
typedef unsigned short uint16_t;
|
||||
# define UINT16_C(v) ((uint16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef INT16_MAX
|
||||
# define INT16_MAX 0x7fff
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
# define INT16_MIN INT16_C(0x8000)
|
||||
#endif
|
||||
#ifndef int16_t
|
||||
#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
|
||||
typedef signed int int16_t;
|
||||
# define INT16_C(v) ((int16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER ""
|
||||
# endif
|
||||
#elif (SHRT_MAX == INT16_MAX)
|
||||
typedef signed short int16_t;
|
||||
# define INT16_C(v) ((int16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UINT32_MAX
|
||||
# define UINT32_MAX (0xffffffffUL)
|
||||
#endif
|
||||
#ifndef uint32_t
|
||||
#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
|
||||
typedef unsigned long uint32_t;
|
||||
# define UINT32_C(v) v ## UL
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
#elif (UINT_MAX == UINT32_MAX)
|
||||
typedef unsigned int uint32_t;
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
# define UINT32_C(v) v ## U
|
||||
#elif (USHRT_MAX == UINT32_MAX)
|
||||
typedef unsigned short uint32_t;
|
||||
# define UINT32_C(v) ((unsigned short) (v))
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef INT32_MAX
|
||||
# define INT32_MAX (0x7fffffffL)
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
# define INT32_MIN INT32_C(0x80000000)
|
||||
#endif
|
||||
#ifndef int32_t
|
||||
#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
|
||||
typedef signed long int32_t;
|
||||
# define INT32_C(v) v ## L
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
#elif (INT_MAX == INT32_MAX)
|
||||
typedef signed int int32_t;
|
||||
# define INT32_C(v) v
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#elif (SHRT_MAX == INT32_MAX)
|
||||
typedef signed short int32_t;
|
||||
# define INT32_C(v) ((short) (v))
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The macro stdint_int64_defined is temporarily used to record
|
||||
* whether or not 64 integer support is available. It must be
|
||||
* defined for any 64 integer extensions for new platforms that are
|
||||
* added.
|
||||
*/
|
||||
|
||||
#undef stdint_int64_defined
|
||||
#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
|
||||
# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
|
||||
# define stdint_int64_defined
|
||||
typedef long long int64_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined (stdint_int64_defined)
|
||||
# if defined(__GNUC__)
|
||||
# define stdint_int64_defined
|
||||
__extension__ typedef long long int64_t;
|
||||
__extension__ typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
|
||||
# define stdint_int64_defined
|
||||
typedef long long int64_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
|
||||
# define stdint_int64_defined
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
# define UINT64_C(v) v ## UI64
|
||||
# define INT64_C(v) v ## I64
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "I64"
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined (LONG_LONG_MAX) && defined (INT64_C)
|
||||
# define LONG_LONG_MAX INT64_C (9223372036854775807)
|
||||
#endif
|
||||
#ifndef ULONG_LONG_MAX
|
||||
# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
|
||||
#endif
|
||||
|
||||
#if !defined (INT64_MAX) && defined (INT64_C)
|
||||
# define INT64_MAX INT64_C (9223372036854775807)
|
||||
#endif
|
||||
#if !defined (INT64_MIN) && defined (INT64_C)
|
||||
# define INT64_MIN INT64_C (-9223372036854775808)
|
||||
#endif
|
||||
#if !defined (UINT64_MAX) && defined (INT64_C)
|
||||
# define UINT64_MAX UINT64_C (18446744073709551615)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Width of hexadecimal for number field.
|
||||
*/
|
||||
|
||||
#ifndef PRINTF_INT64_HEX_WIDTH
|
||||
# define PRINTF_INT64_HEX_WIDTH "16"
|
||||
#endif
|
||||
#ifndef PRINTF_INT32_HEX_WIDTH
|
||||
# define PRINTF_INT32_HEX_WIDTH "8"
|
||||
#endif
|
||||
#ifndef PRINTF_INT16_HEX_WIDTH
|
||||
# define PRINTF_INT16_HEX_WIDTH "4"
|
||||
#endif
|
||||
#ifndef PRINTF_INT8_HEX_WIDTH
|
||||
# define PRINTF_INT8_HEX_WIDTH "2"
|
||||
#endif
|
||||
|
||||
#ifndef PRINTF_INT64_DEC_WIDTH
|
||||
# define PRINTF_INT64_DEC_WIDTH "20"
|
||||
#endif
|
||||
#ifndef PRINTF_INT32_DEC_WIDTH
|
||||
# define PRINTF_INT32_DEC_WIDTH "10"
|
||||
#endif
|
||||
#ifndef PRINTF_INT16_DEC_WIDTH
|
||||
# define PRINTF_INT16_DEC_WIDTH "5"
|
||||
#endif
|
||||
#ifndef PRINTF_INT8_DEC_WIDTH
|
||||
# define PRINTF_INT8_DEC_WIDTH "3"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Ok, lets not worry about 128 bit integers for now. Moore's law says
|
||||
* we don't need to worry about that until about 2040 at which point
|
||||
* we'll have bigger things to worry about.
|
||||
*/
|
||||
|
||||
#ifdef stdint_int64_defined
|
||||
typedef int64_t intmax_t;
|
||||
typedef uint64_t uintmax_t;
|
||||
# define INTMAX_MAX INT64_MAX
|
||||
# define INTMAX_MIN INT64_MIN
|
||||
# define UINTMAX_MAX UINT64_MAX
|
||||
# define UINTMAX_C(v) UINT64_C(v)
|
||||
# define INTMAX_C(v) INT64_C(v)
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
|
||||
# endif
|
||||
#else
|
||||
typedef int32_t intmax_t;
|
||||
typedef uint32_t uintmax_t;
|
||||
# define INTMAX_MAX INT32_MAX
|
||||
# define UINTMAX_MAX UINT32_MAX
|
||||
# define UINTMAX_C(v) UINT32_C(v)
|
||||
# define INTMAX_C(v) INT32_C(v)
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Because this file currently only supports platforms which have
|
||||
* precise powers of 2 as bit sizes for the default integers, the
|
||||
* least definitions are all trivial. Its possible that a future
|
||||
* version of this file could have different definitions.
|
||||
*/
|
||||
|
||||
#ifndef stdint_least_defined
|
||||
typedef int8_t int_least8_t;
|
||||
typedef uint8_t uint_least8_t;
|
||||
typedef int16_t int_least16_t;
|
||||
typedef uint16_t uint_least16_t;
|
||||
typedef int32_t int_least32_t;
|
||||
typedef uint32_t uint_least32_t;
|
||||
# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
|
||||
# define UINT_LEAST8_MAX UINT8_MAX
|
||||
# define INT_LEAST8_MAX INT8_MAX
|
||||
# define UINT_LEAST16_MAX UINT16_MAX
|
||||
# define INT_LEAST16_MAX INT16_MAX
|
||||
# define UINT_LEAST32_MAX UINT32_MAX
|
||||
# define INT_LEAST32_MAX INT32_MAX
|
||||
# define INT_LEAST8_MIN INT8_MIN
|
||||
# define INT_LEAST16_MIN INT16_MIN
|
||||
# define INT_LEAST32_MIN INT32_MIN
|
||||
# ifdef stdint_int64_defined
|
||||
typedef int64_t int_least64_t;
|
||||
typedef uint64_t uint_least64_t;
|
||||
# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# define UINT_LEAST64_MAX UINT64_MAX
|
||||
# define INT_LEAST64_MAX INT64_MAX
|
||||
# define INT_LEAST64_MIN INT64_MIN
|
||||
# endif
|
||||
#endif
|
||||
#undef stdint_least_defined
|
||||
|
||||
/*
|
||||
* The ANSI C committee pretending to know or specify anything about
|
||||
* performance is the epitome of misguided arrogance. The mandate of
|
||||
* this file is to *ONLY* ever support that absolute minimum
|
||||
* definition of the fast integer types, for compatibility purposes.
|
||||
* No extensions, and no attempt to suggest what may or may not be a
|
||||
* faster integer type will ever be made in this file. Developers are
|
||||
* warned to stay away from these types when using this or any other
|
||||
* stdint.h.
|
||||
*/
|
||||
|
||||
typedef int_least8_t int_fast8_t;
|
||||
typedef uint_least8_t uint_fast8_t;
|
||||
typedef int_least16_t int_fast16_t;
|
||||
typedef uint_least16_t uint_fast16_t;
|
||||
typedef int_least32_t int_fast32_t;
|
||||
typedef uint_least32_t uint_fast32_t;
|
||||
#define UINT_FAST8_MAX UINT_LEAST8_MAX
|
||||
#define INT_FAST8_MAX INT_LEAST8_MAX
|
||||
#define UINT_FAST16_MAX UINT_LEAST16_MAX
|
||||
#define INT_FAST16_MAX INT_LEAST16_MAX
|
||||
#define UINT_FAST32_MAX UINT_LEAST32_MAX
|
||||
#define INT_FAST32_MAX INT_LEAST32_MAX
|
||||
#define INT_FAST8_MIN INT_LEAST8_MIN
|
||||
#define INT_FAST16_MIN INT_LEAST16_MIN
|
||||
#define INT_FAST32_MIN INT_LEAST32_MIN
|
||||
#ifdef stdint_int64_defined
|
||||
typedef int_least64_t int_fast64_t;
|
||||
typedef uint_least64_t uint_fast64_t;
|
||||
# define UINT_FAST64_MAX UINT_LEAST64_MAX
|
||||
# define INT_FAST64_MAX INT_LEAST64_MAX
|
||||
# define INT_FAST64_MIN INT_LEAST64_MIN
|
||||
#endif
|
||||
|
||||
#undef stdint_int64_defined
|
||||
|
||||
/*
|
||||
* Whatever piecemeal, per compiler thing we can do about the wchar_t
|
||||
* type limits.
|
||||
*/
|
||||
|
||||
#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
|
||||
# include <wchar.h>
|
||||
# ifndef WCHAR_MIN
|
||||
# define WCHAR_MIN 0
|
||||
# endif
|
||||
# ifndef WCHAR_MAX
|
||||
# define WCHAR_MAX ((wchar_t)-1)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Whatever piecemeal, per compiler/platform thing we can do about the
|
||||
* (u)intptr_t types and limits.
|
||||
*/
|
||||
|
||||
#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
|
||||
# define STDINT_H_UINTPTR_T_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef STDINT_H_UINTPTR_T_DEFINED
|
||||
# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
|
||||
# define stdint_intptr_bits 64
|
||||
# elif defined (__WATCOMC__) || defined (__TURBOC__)
|
||||
# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
|
||||
# define stdint_intptr_bits 16
|
||||
# else
|
||||
# define stdint_intptr_bits 32
|
||||
# endif
|
||||
# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
|
||||
# define stdint_intptr_bits 32
|
||||
# elif defined (__INTEL_COMPILER)
|
||||
/* TODO -- what did Intel do about x86-64? */
|
||||
# endif
|
||||
|
||||
# ifdef stdint_intptr_bits
|
||||
# define stdint_intptr_glue3_i(a,b,c) a##b##c
|
||||
# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c)
|
||||
# ifndef PRINTF_INTPTR_MODIFIER
|
||||
# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
|
||||
# endif
|
||||
# ifndef PTRDIFF_MAX
|
||||
# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef PTRDIFF_MIN
|
||||
# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
|
||||
# endif
|
||||
# ifndef UINTPTR_MAX
|
||||
# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef INTPTR_MAX
|
||||
# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef INTPTR_MIN
|
||||
# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
|
||||
# endif
|
||||
# ifndef INTPTR_C
|
||||
# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
|
||||
# endif
|
||||
# ifndef UINTPTR_C
|
||||
# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
|
||||
# endif
|
||||
typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
|
||||
typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t;
|
||||
# else
|
||||
/* TODO -- This following is likely wrong for some platforms, and does
|
||||
nothing for the definition of uintptr_t. */
|
||||
typedef ptrdiff_t intptr_t;
|
||||
# endif
|
||||
# define STDINT_H_UINTPTR_T_DEFINED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assumes sig_atomic_t is signed and we have a 2s complement machine.
|
||||
*/
|
||||
|
||||
#ifndef SIG_ATOMIC_MAX
|
||||
# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
|
||||
|
||||
/*
|
||||
* Please compile with the maximum warning settings to make sure macros are not
|
||||
* defined more than once.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define glue3_aux(x,y,z) x ## y ## z
|
||||
#define glue3(x,y,z) glue3_aux(x,y,z)
|
||||
|
||||
#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
|
||||
#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
|
||||
|
||||
#define DECL(us,bits) glue3(DECL,us,) (bits)
|
||||
|
||||
#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
|
||||
|
||||
int main () {
|
||||
DECL(I,8)
|
||||
DECL(U,8)
|
||||
DECL(I,16)
|
||||
DECL(U,16)
|
||||
DECL(I,32)
|
||||
DECL(U,32)
|
||||
#ifdef INT64_MAX
|
||||
DECL(I,64)
|
||||
DECL(U,64)
|
||||
#endif
|
||||
intmax_t imax = INTMAX_C(0);
|
||||
uintmax_t umax = UINTMAX_C(0);
|
||||
char str0[256], str1[256];
|
||||
|
||||
sprintf (str0, "%d %x\n", 0, ~0);
|
||||
|
||||
sprintf (str1, "%d %x\n", i8, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
|
||||
sprintf (str1, "%u %x\n", u8, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
|
||||
sprintf (str1, "%d %x\n", i16, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
|
||||
sprintf (str1, "%u %x\n", u16, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
|
||||
sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
|
||||
sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
|
||||
#ifdef INT64_MAX
|
||||
sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
|
||||
#endif
|
||||
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
|
||||
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
|
||||
if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
|
||||
|
||||
TESTUMAX(8);
|
||||
TESTUMAX(16);
|
||||
TESTUMAX(32);
|
||||
#ifdef INT64_MAX
|
||||
TESTUMAX(64);
|
||||
#endif
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
53
license.txt
Normal file
53
license.txt
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
Original HLSLcc source code Copyright (c) 2012 James Jones
|
||||
Further improvements Copyright (c) 2014-2016 Unity Technologies
|
||||
All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
This software makes use of the bstring library which is provided under the following license:
|
||||
|
||||
Copyright (c) 2002-2008 Paul Hsieh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of bstrlib nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
824
src/ControlFlowGraph.cpp
Normal file
824
src/ControlFlowGraph.cpp
Normal file
@ -0,0 +1,824 @@
|
||||
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "internal_includes/ControlFlowGraphUtils.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace HLSLcc::ControlFlow;
|
||||
using HLSLcc::ForEachOperand;
|
||||
|
||||
const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction)
|
||||
{
|
||||
using std::for_each;
|
||||
|
||||
m_BlockMap.clear();
|
||||
m_BlockStorage.clear();
|
||||
|
||||
// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
|
||||
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL);
|
||||
|
||||
// Build the reachable set for each block
|
||||
bool hadChanges;
|
||||
do
|
||||
{
|
||||
hadChanges = false;
|
||||
for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr<BasicBlock> &bb)
|
||||
{
|
||||
BasicBlock &b = *bb.get();
|
||||
if (b.RebuildReachable())
|
||||
{
|
||||
hadChanges = true;
|
||||
}
|
||||
});
|
||||
} while (hadChanges == true);
|
||||
|
||||
return *root;
|
||||
}
|
||||
|
||||
const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const
|
||||
{
|
||||
BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
|
||||
if (itr == m_BlockMap.end())
|
||||
return NULL;
|
||||
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction)
|
||||
{
|
||||
BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
|
||||
if (itr == m_BlockMap.end())
|
||||
return NULL;
|
||||
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
|
||||
// Auto-registers itself into ControlFlowGraph
|
||||
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead)
|
||||
: m_Graph(graph)
|
||||
, m_First(psFirst)
|
||||
, m_Last(NULL)
|
||||
{
|
||||
m_UEVar.clear();
|
||||
m_VarKill.clear();
|
||||
m_Preceding.clear();
|
||||
m_Succeeding.clear();
|
||||
m_DEDef.clear();
|
||||
m_Reachable.clear();
|
||||
|
||||
// Check that we've pruned the labels
|
||||
ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst));
|
||||
|
||||
// Insert to block storage, block map and connect to previous block
|
||||
m_Graph.m_BlockStorage.push_back(shared_ptr<BasicBlock>(this));
|
||||
|
||||
bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second;
|
||||
ASSERT(didInsert);
|
||||
|
||||
if (psPrecedingBlockHead != NULL)
|
||||
{
|
||||
m_Preceding.insert(psPrecedingBlockHead);
|
||||
BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead);
|
||||
ASSERT(prec != 0);
|
||||
didInsert = prec->m_Succeeding.insert(psFirst).second;
|
||||
ASSERT(didInsert);
|
||||
}
|
||||
|
||||
Build();
|
||||
}
|
||||
|
||||
void BasicBlock::Build()
|
||||
{
|
||||
const Instruction *inst = m_First;
|
||||
while (1)
|
||||
{
|
||||
// Process sources first
|
||||
ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
|
||||
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
// Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore
|
||||
if (m_VarKill.find(regIdx) != m_VarKill.end())
|
||||
continue;
|
||||
|
||||
// Add to UEVars set. Doesn't matter if it's already there.
|
||||
m_UEVar.insert(regIdx);
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Then the destination operands
|
||||
ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
|
||||
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Add to kill set. Dupes are fine, this is a set.
|
||||
m_VarKill.insert(regIdx);
|
||||
// Also into the downward definitions. Overwrite the previous definition in this basic block, if any
|
||||
Definition d(psInst, psOperand);
|
||||
m_DEDef[regIdx].clear();
|
||||
m_DEDef[regIdx].insert(d);
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Check for flow control instructions
|
||||
bool blockDone = false;
|
||||
switch (inst->eOpcode)
|
||||
{
|
||||
default:
|
||||
break;
|
||||
case OPCODE_RET:
|
||||
blockDone = true;
|
||||
break;
|
||||
case OPCODE_RETC:
|
||||
// Basic block is done, start a next one.
|
||||
// There REALLY should be no existing blocks for this one
|
||||
ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst+1)) == NULL);
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
|
||||
blockDone = true;
|
||||
break;
|
||||
case OPCODE_LOOP:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_ENDIF:
|
||||
case OPCODE_ENDSWITCH:
|
||||
// Not a flow control branch, but need to start a new block anyway.
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
|
||||
blockDone = true;
|
||||
break;
|
||||
|
||||
// Branches
|
||||
case OPCODE_IF:
|
||||
case OPCODE_BREAKC:
|
||||
case OPCODE_CONTINUEC:
|
||||
{
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
// The control branches to the next instruction or jumps to jumpPoint
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst+1));
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
case OPCODE_SWITCH:
|
||||
{
|
||||
bool sawEndSwitch = false;
|
||||
bool needConnectToParent = false;
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
while (1)
|
||||
{
|
||||
if(!sawEndSwitch || needConnectToParent)
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
if (sawEndSwitch)
|
||||
break;
|
||||
|
||||
// The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label
|
||||
ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT);
|
||||
jumpPoint = Utils::GetJumpPoint(jumpPoint-1, &sawEndSwitch, &needConnectToParent);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
}
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Non-conditional jumps
|
||||
case OPCODE_BREAK:
|
||||
case OPCODE_ELSE:
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_ENDLOOP:
|
||||
{
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (blockDone)
|
||||
break;
|
||||
|
||||
inst++;
|
||||
}
|
||||
// In initial building phase, just make m_Reachable equal to m_DEDef
|
||||
m_Reachable = m_DEDef;
|
||||
|
||||
// Tag the end of the basic block
|
||||
m_Last = inst;
|
||||
// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
|
||||
}
|
||||
|
||||
|
||||
BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
|
||||
{
|
||||
// First see if this already exists
|
||||
BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst);
|
||||
if (b)
|
||||
{
|
||||
// Just add dependency and we're done
|
||||
b->m_Preceding.insert(m_First);
|
||||
m_Succeeding.insert(psFirst);
|
||||
return b;
|
||||
}
|
||||
// Otherwise create one. Self-registering and self-connecting
|
||||
return new BasicBlock(psFirst, m_Graph, m_First);
|
||||
}
|
||||
|
||||
bool BasicBlock::RebuildReachable()
|
||||
{
|
||||
// Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes.
|
||||
// Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill())
|
||||
|
||||
ReachableVariables newReachable = m_DEDef;
|
||||
bool hasChanges = false;
|
||||
|
||||
// Loop each predecessor
|
||||
std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr)
|
||||
{
|
||||
const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr);
|
||||
const ReachableVariables &precReachable = prec->Reachable();
|
||||
|
||||
// Loop each variable*component
|
||||
std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair<uint32_t, BasicBlock::ReachableDefinitionsPerVariable> &itr2)
|
||||
{
|
||||
uint32_t regIdx = itr2.first;
|
||||
const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second;
|
||||
|
||||
// Already killed in this block?
|
||||
if (VarKill().find(regIdx) != VarKill().end())
|
||||
return;
|
||||
|
||||
// Only do comparisons against current definitions if we've yet to find any changes
|
||||
BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0;
|
||||
if (!hasChanges)
|
||||
currReachablePerVar = &m_Reachable[regIdx];
|
||||
|
||||
BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx];
|
||||
|
||||
// Loop each definition
|
||||
std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d)
|
||||
{
|
||||
if (!hasChanges)
|
||||
{
|
||||
// Check if already there
|
||||
if (currReachablePerVar->find(d) == currReachablePerVar->end())
|
||||
hasChanges = true;
|
||||
}
|
||||
newReachablePerVar.insert(d);
|
||||
}); // definition
|
||||
|
||||
}); // variable*component
|
||||
}); // predecessor
|
||||
|
||||
if (hasChanges)
|
||||
{
|
||||
std::swap(m_Reachable, newReachable);
|
||||
}
|
||||
|
||||
return hasChanges;
|
||||
}
|
||||
|
||||
void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
|
||||
{
|
||||
std::for_each(b.begin(), b.end(), [&a](const std::pair<uint32_t, ReachableDefinitionsPerVariable> &rpvPair)
|
||||
{
|
||||
uint32_t regIdx = rpvPair.first;
|
||||
const ReachableDefinitionsPerVariable &rpv = rpvPair.second;
|
||||
// No previous definitions for this variable?
|
||||
auto aRPVItr = a.find(regIdx);
|
||||
if (aRPVItr == a.end())
|
||||
{
|
||||
// Just set the definitions and continue
|
||||
a[regIdx] = rpv;
|
||||
return;
|
||||
}
|
||||
ReachableDefinitionsPerVariable &aRPV = aRPVItr->second;
|
||||
aRPV.insert(rpv.begin(), rpv.end());
|
||||
});
|
||||
}
|
||||
|
||||
#if ENABLE_UNIT_TESTS
|
||||
|
||||
#define UNITY_EXTERNAL_TOOL 1
|
||||
#include "Testing.h" // From Runtime/Testing
|
||||
|
||||
UNIT_TEST_SUITE(HLSLccTests)
|
||||
{
|
||||
TEST(ControlFlowGraph_Build_Simple_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// MOV t0.xyzw, I0.xyzw
|
||||
Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf),
|
||||
Instruction(1, OPCODE_RET)
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst);
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[1], root.Last());
|
||||
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(root.Succeeding().empty());
|
||||
|
||||
CHECK_EQUAL(4, root.VarKill().size());
|
||||
|
||||
// Check that all components from t0 are killed
|
||||
CHECK_EQUAL(1, root.VarKill().count(0));
|
||||
CHECK_EQUAL(1, root.VarKill().count(1));
|
||||
CHECK_EQUAL(1, root.VarKill().count(2));
|
||||
CHECK_EQUAL(1, root.VarKill().count(3));
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand);
|
||||
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_If_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// B0
|
||||
// 0: MOV t1.xyzw, i0.xyzw
|
||||
Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf),
|
||||
// 1: MUL t0, t1, t1
|
||||
Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf),
|
||||
// 2: IF t1.y
|
||||
Instruction(2, OPCODE_IF, 1, 2),
|
||||
// B1
|
||||
// 3: MOV o0, t0
|
||||
Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf),
|
||||
// 4:
|
||||
Instruction(4, OPCODE_ELSE),
|
||||
// B2
|
||||
// 5: MOV o0, t1
|
||||
Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
|
||||
// 6:
|
||||
Instruction(6, OPCODE_ENDIF),
|
||||
// B3
|
||||
// 7:
|
||||
Instruction(7, OPCODE_NOP),
|
||||
// 8:
|
||||
Instruction(8, OPCODE_RET)
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst);
|
||||
|
||||
CHECK_EQUAL(root.First(), &inst[0]);
|
||||
CHECK_EQUAL(root.Last(), &inst[2]);
|
||||
|
||||
CHECK(root.Preceding().empty());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
|
||||
CHECK_EQUAL(&inst[3], b1->First());
|
||||
CHECK_EQUAL(&inst[5], b2->First());
|
||||
CHECK_EQUAL(&inst[7], b3->First());
|
||||
|
||||
CHECK_EQUAL(&inst[4], b1->Last());
|
||||
CHECK_EQUAL(&inst[6], b2->Last());
|
||||
CHECK_EQUAL(&inst[8], b3->Last());
|
||||
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[5]));
|
||||
CHECK_EQUAL(2, root.Succeeding().size());
|
||||
|
||||
CHECK_EQUAL(1, b1->Preceding().size());
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
|
||||
CHECK_EQUAL(1, b2->Preceding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
|
||||
|
||||
CHECK_EQUAL(2, b3->Preceding().size());
|
||||
CHECK_EQUAL(0, b3->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[5]));
|
||||
|
||||
// The if block must have upwards-exposed t0
|
||||
CHECK_EQUAL(1, b1->UEVar().count(0));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(1));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(2));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(3));
|
||||
|
||||
// The else block must have upwards-exposed t1
|
||||
CHECK_EQUAL(1, b2->UEVar().count(4));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(5));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(6));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(7));
|
||||
|
||||
CHECK_EQUAL(8, root.VarKill().size());
|
||||
|
||||
// Check that all components from t0 and t1 are killed
|
||||
CHECK_EQUAL(1, root.VarKill().count(0));
|
||||
CHECK_EQUAL(1, root.VarKill().count(1));
|
||||
CHECK_EQUAL(1, root.VarKill().count(2));
|
||||
CHECK_EQUAL(1, root.VarKill().count(3));
|
||||
|
||||
CHECK_EQUAL(1, root.VarKill().count(4));
|
||||
CHECK_EQUAL(1, root.VarKill().count(5));
|
||||
CHECK_EQUAL(1, root.VarKill().count(6));
|
||||
CHECK_EQUAL(1, root.VarKill().count(7));
|
||||
|
||||
// The expected downwards-exposed definitions:
|
||||
// B0: t0, t1
|
||||
// B1-B3: none
|
||||
|
||||
CHECK_EQUAL(8, root.DEDef().size());
|
||||
CHECK_EQUAL(0, b1->DEDef().size());
|
||||
CHECK_EQUAL(0, b2->DEDef().size());
|
||||
CHECK_EQUAL(0, b3->DEDef().size());
|
||||
|
||||
CHECK(root.DEDef()==root.Reachable());
|
||||
|
||||
CHECK(root.Reachable()==b1->Reachable());
|
||||
CHECK(root.Reachable()==b2->Reachable());
|
||||
CHECK(root.Reachable()==b3->Reachable());
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_SwitchCase_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// Start B0
|
||||
// i0: MOV t0.x, I0.x
|
||||
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
|
||||
// i1: MOVE t1.xyz, I0.yzw
|
||||
Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe),
|
||||
// i2: MOVE t1.w, t0.x
|
||||
Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1),
|
||||
// i3: MOVE t2, I0
|
||||
Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf),
|
||||
// i4: SWITCH t0.y
|
||||
Instruction(4, OPCODE_SWITCH, 1, 2),
|
||||
// End B0
|
||||
// i5: CASE
|
||||
Instruction(5, OPCODE_CASE),
|
||||
// i6: DEFAULT
|
||||
Instruction(6, OPCODE_DEFAULT),
|
||||
// Start B1
|
||||
// i7: MOC t1.z, t0.x
|
||||
Instruction(7, OPCODE_MOV, 1, 4, 0, 1),
|
||||
// i8: CASE
|
||||
Instruction(8, OPCODE_CASE),
|
||||
// End B1
|
||||
// Start B2
|
||||
// i9: MOV t1.z, t2.x
|
||||
Instruction(9, OPCODE_MOV, 1, 4, 2, 1),
|
||||
// i10: BREAK
|
||||
Instruction(10, OPCODE_BREAK),
|
||||
// End B2
|
||||
// i11: CASE
|
||||
Instruction(11, OPCODE_CASE),
|
||||
// Start B3
|
||||
// i12: MOV t1.z, t2.y
|
||||
Instruction(12, OPCODE_MOV, 1, 4, 2, 2),
|
||||
// i13: BREAKC t0.x
|
||||
Instruction(13, OPCODE_BREAKC, 0, 1),
|
||||
// End B3
|
||||
// i14: CASE
|
||||
Instruction(14, OPCODE_CASE),
|
||||
// Start B4
|
||||
// i15: MOV t1.z, t2.z
|
||||
Instruction(15, OPCODE_MOV, 1, 4, 2, 4),
|
||||
// i16: ENDSWITCH
|
||||
Instruction(16, OPCODE_ENDSWITCH),
|
||||
// End B4
|
||||
// Start B5
|
||||
// i17: MOV o0, t1
|
||||
Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
|
||||
// i18: RET
|
||||
Instruction(18, OPCODE_RET)
|
||||
// End B5
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst);
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[4], root.Last());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]);
|
||||
const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]);
|
||||
const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
CHECK(b4 != NULL);
|
||||
CHECK(b5 != NULL);
|
||||
|
||||
// Check instruction ranges
|
||||
CHECK_EQUAL(&inst[8], b1->Last());
|
||||
CHECK_EQUAL(&inst[10], b2->Last());
|
||||
CHECK_EQUAL(&inst[13], b3->Last());
|
||||
CHECK_EQUAL(&inst[16], b4->Last());
|
||||
CHECK_EQUAL(&inst[18], b5->Last());
|
||||
|
||||
// Nothing before the root, nothing after b5
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(b5->Succeeding().empty());
|
||||
|
||||
// Check that all connections are there and no others.
|
||||
|
||||
// B0->B1
|
||||
// B0->B2
|
||||
// B0->B3
|
||||
// B0->B4
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[7]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[12]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[15]));
|
||||
|
||||
CHECK_EQUAL(4, root.Succeeding().size());
|
||||
|
||||
// B1
|
||||
|
||||
// B1->B2
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, b1->Succeeding().size());
|
||||
|
||||
// B0->B1, reverse
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b1->Preceding().size());
|
||||
|
||||
// B2
|
||||
|
||||
// B2->B5
|
||||
CHECK_EQUAL(1, b2->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(1, b2->Succeeding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[7]));
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(2, b2->Preceding().size());
|
||||
|
||||
// B3
|
||||
// B3->B4
|
||||
// B3->B5
|
||||
CHECK_EQUAL(1, b3->Succeeding().count(&inst[15]));
|
||||
CHECK_EQUAL(1, b3->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(2, b3->Succeeding().size());
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b3->Preceding().size());
|
||||
|
||||
// B4
|
||||
CHECK_EQUAL(1, b4->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(1, b4->Succeeding().size());
|
||||
CHECK_EQUAL(1, b4->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(2, b4->Preceding().size());
|
||||
|
||||
// B5
|
||||
CHECK_EQUAL(0, b5->Succeeding().size());
|
||||
CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[12]));
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[15]));
|
||||
|
||||
|
||||
// Verify reachable sets
|
||||
|
||||
CHECK(root.Reachable() == root.DEDef());
|
||||
CHECK_EQUAL(9, root.Reachable().size());
|
||||
|
||||
// B5 should have these reachables:
|
||||
// t0.x only from b0
|
||||
// t1.xy from b0, i1
|
||||
// t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2)
|
||||
// t1.w from b0, i2
|
||||
// t2.xyzw from b0, i3
|
||||
|
||||
// Cast away const so [] works.
|
||||
BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable();
|
||||
|
||||
CHECK_EQUAL(9, r.size());
|
||||
|
||||
CHECK_EQUAL(1, r[0].size());
|
||||
CHECK_EQUAL(0, r[1].size());
|
||||
CHECK_EQUAL(0, r[2].size());
|
||||
CHECK_EQUAL(0, r[3].size());
|
||||
CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction);
|
||||
|
||||
CHECK_EQUAL(1, r[4].size());
|
||||
CHECK_EQUAL(1, r[5].size());
|
||||
CHECK_EQUAL(3, r[6].size());
|
||||
CHECK_EQUAL(1, r[7].size());
|
||||
|
||||
const BasicBlock::ReachableDefinitionsPerVariable &d = r[6];
|
||||
BasicBlock::ReachableDefinitionsPerVariable t;
|
||||
t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0]));
|
||||
t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0]));
|
||||
t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0]));
|
||||
|
||||
CHECK(t == d);
|
||||
|
||||
CHECK_EQUAL(1, r[8].size());
|
||||
CHECK_EQUAL(1, r[9].size());
|
||||
CHECK_EQUAL(1, r[10].size());
|
||||
CHECK_EQUAL(1, r[11].size());
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_Loop_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// Start B0
|
||||
// i0: MOV t0.x, I0.x
|
||||
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
|
||||
// i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should.
|
||||
Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc),
|
||||
// i2: LOOP
|
||||
Instruction(2, OPCODE_LOOP, 1, 2),
|
||||
// End B0 -> B1
|
||||
// Begin B1
|
||||
// i3: MOV t1.x, t0.x
|
||||
Instruction(3, OPCODE_MOV, 1, 1, 0, 1),
|
||||
// i4: BREAKC t0.x
|
||||
Instruction(4, OPCODE_BREAKC, 0, 1),
|
||||
// End B1 -> B2, B3
|
||||
// Begin B2
|
||||
// i5: ADD t0.x, t0.y
|
||||
Instruction(5, OPCODE_ADD, 0, 1, 0, 2),
|
||||
// i6: MOV t1.x, t0.x // This should never show up as definition
|
||||
Instruction(6, OPCODE_MOV, 1, 1, 0, 1),
|
||||
// i7: ENDLOOP
|
||||
Instruction(7, OPCODE_ENDLOOP),
|
||||
// End B2 -> B1
|
||||
// Start B3
|
||||
// i8: MOV O0.x, t1.x
|
||||
Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1),
|
||||
// i9: RET
|
||||
Instruction(9, OPCODE_RET),
|
||||
// End B3
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst);
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[2], root.Last());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
|
||||
// Check instruction ranges
|
||||
CHECK_EQUAL(&inst[4], b1->Last());
|
||||
CHECK_EQUAL(&inst[7], b2->Last());
|
||||
CHECK_EQUAL(&inst[9], b3->Last());
|
||||
|
||||
// Nothing before the root, nothing after b3
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(b3->Succeeding().empty());
|
||||
|
||||
// Check that all connections are there and no others.
|
||||
|
||||
// B0->B1
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, root.Succeeding().size());
|
||||
|
||||
// B1
|
||||
|
||||
// B1->B2
|
||||
// B1->B3
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[5]));
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[8]));
|
||||
CHECK_EQUAL(2, b1->Succeeding().size());
|
||||
|
||||
// B0->B1, reverse
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
// We may also come from B2
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[5]));
|
||||
CHECK_EQUAL(2, b1->Preceding().size());
|
||||
|
||||
// B2
|
||||
|
||||
// B2->B1
|
||||
CHECK_EQUAL(1, b2->Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b2->Succeeding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b2->Preceding().size());
|
||||
|
||||
// B3
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b3->Preceding().size());
|
||||
|
||||
// Verify reachable sets
|
||||
|
||||
|
||||
BasicBlock::ReachableVariables t;
|
||||
|
||||
// B0 DEDef and Reachable
|
||||
t.clear();
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
CHECK(root.DEDef() == t);
|
||||
CHECK(root.Reachable() == root.DEDef());
|
||||
|
||||
// B1 DEDef and Reachable
|
||||
t.clear();
|
||||
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
|
||||
CHECK(b1->DEDef() == t);
|
||||
|
||||
t = b1->DEDef();
|
||||
// t0.x from i0, t1.y (but not .x) from i1
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
// t0.x from i5, but nothing from i6
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
CHECK(b1->Reachable() == t);
|
||||
|
||||
// B2
|
||||
t.clear();
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0]));
|
||||
CHECK(b2->DEDef() == t);
|
||||
|
||||
t = b2->DEDef();
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
CHECK(b2->Reachable() == t);
|
||||
|
||||
// B3
|
||||
t.clear();
|
||||
CHECK(b3->DEDef() == t);
|
||||
// t0.x from i0, t1.y from i1
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
// t1.x from i3
|
||||
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
|
||||
|
||||
// t0.x from i5
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
|
||||
CHECK(b3->Reachable() == t);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
121
src/ControlFlowGraphUtils.cpp
Normal file
121
src/ControlFlowGraphUtils.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
|
||||
#include "ControlFlowGraphUtils.h"
|
||||
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
|
||||
|
||||
// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH
|
||||
const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/)
|
||||
{
|
||||
const Instruction *inst = psStart;
|
||||
// Skip CASE/DEFAULT/ENDSWITCH/LOOP labels
|
||||
while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP)
|
||||
{
|
||||
// We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it)
|
||||
ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL);
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL)
|
||||
*sawEndSwitch = true;
|
||||
inst++;
|
||||
}
|
||||
return inst;
|
||||
|
||||
}
|
||||
|
||||
// For a given flow-control instruction, find the corresponding jump location:
|
||||
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
|
||||
// For ELSE, find same level ENDIF + 1
|
||||
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
|
||||
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
|
||||
// For ENDLOOP, find previous same-level LOOP + 1
|
||||
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
|
||||
// For CONTINUE/C the previous LOOP + 1
|
||||
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
|
||||
// Note that CASE labels fall through.
|
||||
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
|
||||
const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/)
|
||||
{
|
||||
const Instruction *inst = psStart;
|
||||
int depth = 0;
|
||||
OPCODE_TYPE op = psStart->eOpcode;
|
||||
ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC
|
||||
|| op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT
|
||||
|| op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case OPCODE_IF:
|
||||
case OPCODE_ELSE:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_IF)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDIF)
|
||||
depth--;
|
||||
}
|
||||
case OPCODE_BREAK:
|
||||
case OPCODE_BREAKC:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP)
|
||||
depth--;
|
||||
}
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_CONTINUEC:
|
||||
case OPCODE_ENDLOOP:
|
||||
while (1)
|
||||
{
|
||||
inst--;
|
||||
if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_LOOP)
|
||||
depth--;
|
||||
if (inst->eOpcode == OPCODE_ENDLOOP)
|
||||
depth++;
|
||||
}
|
||||
case OPCODE_SWITCH:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_DEFAULT:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
|
||||
{
|
||||
// Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH
|
||||
// so that BasicBlock::Build can distinguish between there being a direct route
|
||||
// from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not.
|
||||
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0)
|
||||
*sawEndSwitch = true;
|
||||
|
||||
return GetNextNonLabelInstruction(inst + 1, needConnectToParent);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_SWITCH)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH)
|
||||
depth--;
|
||||
}
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
769
src/DataTypeAnalysis.cpp
Normal file
769
src/DataTypeAnalysis.cpp
Normal file
@ -0,0 +1,769 @@
|
||||
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/DataTypeAnalysis.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
// Helper function to set the vector type of 1 or more components in a vector
|
||||
// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done
|
||||
// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to
|
||||
// to the "highest" type value (ordering int->uint->float)
|
||||
static void SetVectorType(std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
// Expand the mask to include all components that are used, also upgrade type
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (aeTempVecType[regBaseIndex + i] != SVT_VOID)
|
||||
{
|
||||
componentMask |= (1 << i);
|
||||
eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Now componentMask contains the components we actually need to update and eType may have been changed to something else.
|
||||
// Write the results
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (componentMask & (1 << i))
|
||||
{
|
||||
if (aeTempVecType[regBaseIndex + i] != eType)
|
||||
{
|
||||
aeTempVecType[regBaseIndex + i] = eType;
|
||||
if (psMadeProgress)
|
||||
*psMadeProgress = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault)
|
||||
{
|
||||
SHADER_VARIABLE_TYPE eType = eDefault;
|
||||
switch (prec)
|
||||
{
|
||||
case OPERAND_MIN_PRECISION_DEFAULT:
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_SINT_16:
|
||||
eType = SVT_INT16;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_UINT_16:
|
||||
eType = SVT_UINT16;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_2_8:
|
||||
eType = SVT_FLOAT10;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_16:
|
||||
eType = SVT_FLOAT16;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0); // Catch this to see what's going on.
|
||||
break;
|
||||
}
|
||||
return eType;
|
||||
|
||||
}
|
||||
|
||||
static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
|
||||
{
|
||||
if (psOperand->eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4;
|
||||
uint32_t mask = psOperand->GetAccessMask();
|
||||
// Adjust type based on operand precision
|
||||
eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType);
|
||||
|
||||
SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
for (i = 0; i < psInst->ui32NumOperands; i++)
|
||||
{
|
||||
MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again.
|
||||
static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand)
|
||||
{
|
||||
const ConstantBuffer* psCBuf = NULL;
|
||||
const ShaderVarType* psVarType = NULL;
|
||||
int32_t rebase = 0;
|
||||
bool isArray;
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
return;
|
||||
|
||||
// Ignore selection modes that access more than one component
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
if (!psOperand->IsSwizzleReplicated())
|
||||
return;
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
return;
|
||||
}
|
||||
|
||||
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
|
||||
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
|
||||
|
||||
if (psVarType->Class == SVC_SCALAR)
|
||||
psOperand->iNumComponents = 1;
|
||||
|
||||
}
|
||||
|
||||
struct SetPartialDataTypes
|
||||
{
|
||||
SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec)
|
||||
: m_TempVec(_aeTempVec)
|
||||
{}
|
||||
SHADER_VARIABLE_TYPE *m_TempVec;
|
||||
|
||||
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
|
||||
{
|
||||
uint32_t mask = 0;
|
||||
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
|
||||
SHADER_VARIABLE_TYPE newType;
|
||||
uint32_t i, reg;
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (ui32OperandType == FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
// We really shouldn't ever be getting minprecision float indices here
|
||||
ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8);
|
||||
|
||||
mask = psOperand->GetAccessMask();
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
if (aeTempVecType[reg * 4 + i] == SVT_VOID)
|
||||
aeTempVecType[reg * 4 + i] = newType;
|
||||
}
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return;
|
||||
|
||||
mask = psOperand->GetAccessMask();
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID);
|
||||
ASSERT(newType != SVT_VOID);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
aeTempVecType[reg * 4 + i] = newType;
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Write back the temp datatypes into operands. Also mark scalars in constant buffers
|
||||
|
||||
struct WritebackDataTypes
|
||||
{
|
||||
WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec)
|
||||
: m_Context(_ctx)
|
||||
, m_TempVec(_aeTempVec)
|
||||
{}
|
||||
HLSLCrossCompilerContext *m_Context;
|
||||
SHADER_VARIABLE_TYPE *m_TempVec;
|
||||
|
||||
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
|
||||
{
|
||||
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
|
||||
uint32_t reg, mask, i;
|
||||
SHADER_VARIABLE_TYPE dtype;
|
||||
|
||||
if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
SetCBOperandComponents(m_Context, psOperand);
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
mask = psOperand->GetAccessMask();
|
||||
dtype = SVT_VOID;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
|
||||
// Check that all components have the same type
|
||||
ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]);
|
||||
|
||||
dtype = aeTempVecType[reg * 4 + i];
|
||||
|
||||
ASSERT(dtype != SVT_VOID);
|
||||
ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype));
|
||||
|
||||
psOperand->aeDataType[i] = dtype;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> & instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results)
|
||||
{
|
||||
uint32_t i;
|
||||
Instruction *psFirstInst = &instructions[0];
|
||||
Instruction *psInst = psFirstInst;
|
||||
// Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float
|
||||
std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType = results;
|
||||
|
||||
aeTempVecType.clear();
|
||||
aeTempVecType.resize(ui32TempCount * 4, SVT_VOID);
|
||||
|
||||
if (ui32TempCount == 0)
|
||||
return;
|
||||
|
||||
// Go through the instructions, pick up partial datatypes, because we at least know those for a fact.
|
||||
// Also set all suboperands to be integers (they're always used as indices)
|
||||
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0]));
|
||||
|
||||
// if (psContext->psShader->ui32MajorVersion <= 3)
|
||||
{
|
||||
// First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table
|
||||
// Only ever to int->float promotion (or int->uint), never the other way around
|
||||
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
|
||||
{
|
||||
int k = 0;
|
||||
if (psInst->ui32NumOperands == 0)
|
||||
continue;
|
||||
#ifdef _DEBUG
|
||||
for (k = 0; k < (int)psInst->ui32NumOperands; k++)
|
||||
{
|
||||
if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
// All float-only ops
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_DERIV_RTX:
|
||||
case OPCODE_DERIV_RTY:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_FRC:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_ROUND_NE:
|
||||
case OPCODE_ROUND_NI:
|
||||
case OPCODE_ROUND_PI:
|
||||
case OPCODE_ROUND_Z:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SQRT:
|
||||
case OPCODE_SINCOS:
|
||||
case OPCODE_LOD:
|
||||
case OPCODE_GATHER4:
|
||||
|
||||
case OPCODE_DERIV_RTX_COARSE:
|
||||
case OPCODE_DERIV_RTX_FINE:
|
||||
case OPCODE_DERIV_RTY_COARSE:
|
||||
case OPCODE_DERIV_RTY_FINE:
|
||||
case OPCODE_GATHER4_C:
|
||||
case OPCODE_GATHER4_PO:
|
||||
case OPCODE_GATHER4_PO_C:
|
||||
case OPCODE_RCP:
|
||||
|
||||
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Comparison ops, need to enable possibility for going boolean
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_INE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_AND:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
case OPCODE_IF:
|
||||
case OPCODE_BREAKC:
|
||||
case OPCODE_CALLC:
|
||||
case OPCODE_CONTINUEC:
|
||||
case OPCODE_RETC:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IGE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ULT:
|
||||
case OPCODE_UGE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Integer ops that don't care of signedness
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_INEG:
|
||||
case OPCODE_ISHL:
|
||||
case OPCODE_NOT:
|
||||
case OPCODE_OR:
|
||||
case OPCODE_XOR:
|
||||
case OPCODE_BUFINFO:
|
||||
case OPCODE_COUNTBITS:
|
||||
case OPCODE_FIRSTBIT_HI:
|
||||
case OPCODE_FIRSTBIT_LO:
|
||||
case OPCODE_FIRSTBIT_SHI:
|
||||
case OPCODE_BFI:
|
||||
case OPCODE_BFREV:
|
||||
case OPCODE_ATOMIC_AND:
|
||||
case OPCODE_ATOMIC_OR:
|
||||
case OPCODE_ATOMIC_XOR:
|
||||
case OPCODE_ATOMIC_CMP_STORE:
|
||||
case OPCODE_ATOMIC_IADD:
|
||||
case OPCODE_IMM_ATOMIC_IADD:
|
||||
case OPCODE_IMM_ATOMIC_AND:
|
||||
case OPCODE_IMM_ATOMIC_OR:
|
||||
case OPCODE_IMM_ATOMIC_XOR:
|
||||
case OPCODE_IMM_ATOMIC_EXCH:
|
||||
case OPCODE_IMM_ATOMIC_CMP_EXCH:
|
||||
|
||||
|
||||
MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
// Integer ops
|
||||
case OPCODE_IMAD:
|
||||
case OPCODE_IMAX:
|
||||
case OPCODE_IMIN:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_ISHR:
|
||||
case OPCODE_IBFE:
|
||||
|
||||
case OPCODE_ATOMIC_IMAX:
|
||||
case OPCODE_ATOMIC_IMIN:
|
||||
case OPCODE_IMM_ATOMIC_IMAX:
|
||||
case OPCODE_IMM_ATOMIC_IMIN:
|
||||
MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
// uint ops
|
||||
case OPCODE_UDIV:
|
||||
case OPCODE_UMUL:
|
||||
case OPCODE_UMAD:
|
||||
case OPCODE_UMAX:
|
||||
case OPCODE_UMIN:
|
||||
case OPCODE_USHR:
|
||||
case OPCODE_UADDC:
|
||||
case OPCODE_USUBB:
|
||||
case OPCODE_ATOMIC_UMAX:
|
||||
case OPCODE_ATOMIC_UMIN:
|
||||
case OPCODE_IMM_ATOMIC_UMAX:
|
||||
case OPCODE_IMM_ATOMIC_UMIN:
|
||||
case OPCODE_IMM_ATOMIC_ALLOC:
|
||||
case OPCODE_IMM_ATOMIC_CONSUME:
|
||||
MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
case OPCODE_UBFE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Need special handling
|
||||
case OPCODE_FTOI:
|
||||
case OPCODE_FTOU:
|
||||
MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_GE:
|
||||
case OPCODE_LT:
|
||||
case OPCODE_EQ:
|
||||
case OPCODE_NE:
|
||||
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ITOF:
|
||||
case OPCODE_UTOF:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_LD:
|
||||
case OPCODE_LD_MS:
|
||||
// TODO: Would need to know the sampler return type
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_MOVC:
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
|
||||
case OPCODE_SWAPC:
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_RESINFO:
|
||||
{
|
||||
if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT)
|
||||
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_SAMPLE_INFO:
|
||||
// TODO decode the _uint flag
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_SAMPLE_POS:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
// translates to gvec4 loadImage(gimage i, ivec p).
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_UAV_TYPED:
|
||||
// translates to storeImage(gimage i, ivec p, gvec4 data)
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data
|
||||
break;
|
||||
|
||||
case OPCODE_LD_RAW:
|
||||
if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
else
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_RAW:
|
||||
if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
else
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_STRUCTURED:
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_F32TOF16:
|
||||
case OPCODE_F16TOF32:
|
||||
// TODO
|
||||
ASSERT(0);
|
||||
break;
|
||||
|
||||
|
||||
|
||||
// No-operands, should never get here anyway
|
||||
/* case OPCODE_BREAK:
|
||||
case OPCODE_CALL:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_CUT:
|
||||
case OPCODE_DEFAULT:
|
||||
case OPCODE_DISCARD:
|
||||
case OPCODE_ELSE:
|
||||
case OPCODE_EMIT:
|
||||
case OPCODE_EMITTHENCUT:
|
||||
case OPCODE_ENDIF:
|
||||
case OPCODE_ENDLOOP:
|
||||
case OPCODE_ENDSWITCH:
|
||||
|
||||
case OPCODE_LABEL:
|
||||
case OPCODE_LOOP:
|
||||
case OPCODE_CUSTOMDATA:
|
||||
case OPCODE_NOP:
|
||||
case OPCODE_RET:
|
||||
case OPCODE_SWITCH:
|
||||
case OPCODE_DCL_RESOURCE: // DCL* opcodes have
|
||||
case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
|
||||
case OPCODE_DCL_SAMPLER:
|
||||
case OPCODE_DCL_INDEX_RANGE:
|
||||
case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
|
||||
case OPCODE_DCL_GS_INPUT_PRIMITIVE:
|
||||
case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
|
||||
case OPCODE_DCL_INPUT:
|
||||
case OPCODE_DCL_INPUT_SGV:
|
||||
case OPCODE_DCL_INPUT_SIV:
|
||||
case OPCODE_DCL_INPUT_PS:
|
||||
case OPCODE_DCL_INPUT_PS_SGV:
|
||||
case OPCODE_DCL_INPUT_PS_SIV:
|
||||
case OPCODE_DCL_OUTPUT:
|
||||
case OPCODE_DCL_OUTPUT_SGV:
|
||||
case OPCODE_DCL_OUTPUT_SIV:
|
||||
case OPCODE_DCL_TEMPS:
|
||||
case OPCODE_DCL_INDEXABLE_TEMP:
|
||||
case OPCODE_DCL_GLOBAL_FLAGS:
|
||||
|
||||
|
||||
case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader
|
||||
|
||||
case OPCODE_EMIT_STREAM:
|
||||
case OPCODE_CUT_STREAM:
|
||||
case OPCODE_EMITTHENCUT_STREAM:
|
||||
case OPCODE_INTERFACE_CALL:
|
||||
|
||||
|
||||
case OPCODE_DCL_STREAM:
|
||||
case OPCODE_DCL_FUNCTION_BODY:
|
||||
case OPCODE_DCL_FUNCTION_TABLE:
|
||||
case OPCODE_DCL_INTERFACE:
|
||||
|
||||
case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
|
||||
case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
|
||||
case OPCODE_DCL_TESS_DOMAIN:
|
||||
case OPCODE_DCL_TESS_PARTITIONING:
|
||||
case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
|
||||
case OPCODE_DCL_HS_MAX_TESSFACTOR:
|
||||
case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
|
||||
case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
|
||||
|
||||
case OPCODE_DCL_THREAD_GROUP:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
|
||||
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
|
||||
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
|
||||
case OPCODE_DCL_RESOURCE_RAW:
|
||||
case OPCODE_DCL_RESOURCE_STRUCTURED:
|
||||
case OPCODE_SYNC:
|
||||
|
||||
// TODO
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_DMAX:
|
||||
case OPCODE_DMIN:
|
||||
case OPCODE_DMUL:
|
||||
case OPCODE_DEQ:
|
||||
case OPCODE_DGE:
|
||||
case OPCODE_DLT:
|
||||
case OPCODE_DNE:
|
||||
case OPCODE_DMOV:
|
||||
case OPCODE_DMOVC:
|
||||
case OPCODE_DTOF:
|
||||
case OPCODE_FTOD:
|
||||
|
||||
case OPCODE_EVAL_SNAPPED:
|
||||
case OPCODE_EVAL_SAMPLE_INDEX:
|
||||
case OPCODE_EVAL_CENTROID:
|
||||
|
||||
case OPCODE_DCL_GS_INSTANCE_COUNT:
|
||||
|
||||
case OPCODE_ABORT:
|
||||
case OPCODE_DEBUG_BREAK:*/
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int madeProgress = 0;
|
||||
// Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have
|
||||
do
|
||||
{
|
||||
madeProgress = 0;
|
||||
psInst = psFirstInst;
|
||||
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
|
||||
{
|
||||
if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC)
|
||||
{
|
||||
// Figure out the data type
|
||||
uint32_t k;
|
||||
SHADER_VARIABLE_TYPE dataType = SVT_VOID;
|
||||
int foundImmediate = 0;
|
||||
for (k = 0; k < psInst->ui32NumOperands; k++)
|
||||
{
|
||||
uint32_t mask, j;
|
||||
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
|
||||
continue; // Ignore the condition operand, it's always int
|
||||
|
||||
if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
foundImmediate = 1;
|
||||
continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed
|
||||
}
|
||||
|
||||
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
|
||||
{
|
||||
dataType = psInst->asOperands[k].GetDataType(psContext);
|
||||
break;
|
||||
}
|
||||
|
||||
if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE)
|
||||
{
|
||||
// If any modifiers are used in MOV or MOVC, that automatically is treated as float.
|
||||
dataType = SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
|
||||
mask = psInst->asOperands[k].GetAccessMask();
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (!(mask & (1 << j)))
|
||||
continue;
|
||||
if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID)
|
||||
{
|
||||
dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (foundImmediate && dataType == SVT_VOID)
|
||||
dataType = SVT_INT;
|
||||
|
||||
if (dataType != SVT_VOID)
|
||||
{
|
||||
// Found data type, write to all operands
|
||||
// First adjust it to not have precision qualifiers in it
|
||||
switch (dataType)
|
||||
{
|
||||
case SVT_FLOAT10:
|
||||
case SVT_FLOAT16:
|
||||
dataType = SVT_FLOAT;
|
||||
break;
|
||||
case SVT_INT12:
|
||||
case SVT_INT16:
|
||||
dataType = SVT_INT;
|
||||
break;
|
||||
case SVT_UINT16:
|
||||
case SVT_UINT8:
|
||||
dataType = SVT_UINT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
for (k = 0; k < psInst->ui32NumOperands; k++)
|
||||
{
|
||||
uint32_t mask;
|
||||
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
|
||||
continue; // Ignore the condition operand, it's always int
|
||||
|
||||
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
|
||||
continue;
|
||||
if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
|
||||
continue;
|
||||
|
||||
mask = psInst->asOperands[k].GetAccessMask();
|
||||
SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (madeProgress != 0);
|
||||
}
|
||||
|
||||
|
||||
// translate forced_int and int_ambiguous back to int
|
||||
for (i = 0; i < ui32TempCount * 4; i++)
|
||||
{
|
||||
if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS)
|
||||
aeTempVecType[i] = SVT_INT;
|
||||
}
|
||||
|
||||
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0]));
|
||||
|
||||
// Propagate boolean data types over logical operators
|
||||
bool didProgress = false;
|
||||
do
|
||||
{
|
||||
didProgress = false;
|
||||
std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i)
|
||||
{
|
||||
if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR)
|
||||
&& (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL)
|
||||
&& (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL))
|
||||
{
|
||||
// Check if all uses see only this define
|
||||
bool isStandalone = true;
|
||||
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u)
|
||||
{
|
||||
if (u.m_Op->m_Defines.size() > 1)
|
||||
isStandalone = false;
|
||||
});
|
||||
|
||||
if (isStandalone)
|
||||
{
|
||||
didProgress = true;
|
||||
// Change data type of this and all uses
|
||||
i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL;
|
||||
uint32_t reg = i.asOperands[0].ui32RegisterNumber;
|
||||
aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL;
|
||||
|
||||
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u)
|
||||
{
|
||||
u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
} while (didProgress);
|
||||
|
||||
}
|
2
src/Declaration.cpp
Normal file
2
src/Declaration.cpp
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
#include "internal_includes/Declaration.h"
|
253
src/HLSLCrossCompilerContext.cpp
Normal file
253
src/HLSLCrossCompilerContext.cpp
Normal file
@ -0,0 +1,253 @@
|
||||
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/DataTypeAnalysis.h"
|
||||
#include "internal_includes/UseDefineChains.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Translator.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include <sstream>
|
||||
|
||||
void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
|
||||
{
|
||||
size_t ui32DeclCount = psPhase->psDecl.size();
|
||||
uint32_t i;
|
||||
|
||||
psPhase->psTempDeclaration = NULL;
|
||||
psPhase->ui32OrigTemps = 0;
|
||||
psPhase->ui32TotalTemps = 0;
|
||||
|
||||
// Retrieve the temp decl count
|
||||
for (i = 0; i < ui32DeclCount; ++i)
|
||||
{
|
||||
if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
|
||||
{
|
||||
psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps;
|
||||
psPhase->psTempDeclaration = &psPhase->psDecl[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (psPhase->ui32TotalTemps == 0)
|
||||
return;
|
||||
|
||||
psPhase->ui32OrigTemps = psPhase->ui32TotalTemps;
|
||||
|
||||
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
|
||||
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
|
||||
psPhase->pui32SplitInfo.clear();
|
||||
psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff);
|
||||
|
||||
// Build use-define chains and split temps based on those.
|
||||
{
|
||||
DefineUseChains duChains;
|
||||
UseDefineChains udChains;
|
||||
|
||||
BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG());
|
||||
|
||||
CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
|
||||
|
||||
// Only do sampler precision downgrade on pixel shaders.
|
||||
if (psShader->eShaderType == PIXEL_SHADER)
|
||||
UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
|
||||
|
||||
UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
|
||||
|
||||
WriteBackUsesAndDefines(duChains);
|
||||
}
|
||||
|
||||
HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes);
|
||||
|
||||
if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps))
|
||||
psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
|
||||
}
|
||||
|
||||
void HLSLCrossCompilerContext::ClearDependencyData()
|
||||
{
|
||||
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case PIXEL_SHADER:
|
||||
{
|
||||
psDependencies->ClearCrossDependencyData();
|
||||
}
|
||||
case HULL_SHADER:
|
||||
{
|
||||
psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
|
||||
psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void HLSLCrossCompilerContext::AddIndentation()
|
||||
{
|
||||
int i;
|
||||
bstring glsl = *currentGLSLString;
|
||||
for (i = 0; i < indent; ++i)
|
||||
{
|
||||
bcatcstr(glsl, " ");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
const ShaderInfo::InOutSignature* psIn = NULL;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
|
||||
if (iIgnoreRedirect == 0)
|
||||
{
|
||||
if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|
||||
||
|
||||
(regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
|
||||
{
|
||||
oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber;
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
|
||||
|
||||
if (psIn && piRebase)
|
||||
*piRebase = psIn->iRebase;
|
||||
|
||||
std::string res = "";
|
||||
bool skipPrefix = false;
|
||||
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix))
|
||||
{
|
||||
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
|
||||
return inputPrefix + res;
|
||||
else
|
||||
return res;
|
||||
}
|
||||
|
||||
ASSERT(psIn != NULL);
|
||||
oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand,
|
||||
int* piStream,
|
||||
uint32_t *puiIgnoreSwizzle,
|
||||
int *piRebase,
|
||||
int iIgnoreRedirect) const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
const ShaderInfo::InOutSignature* psOut = NULL;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
|
||||
if (iIgnoreRedirect == 0)
|
||||
{
|
||||
if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|
||||
|| (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
|
||||
{
|
||||
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber;
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true);
|
||||
|
||||
|
||||
if (psOut && piRebase)
|
||||
*piRebase = psOut->iRebase;
|
||||
|
||||
if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end()))
|
||||
{
|
||||
// Need to route through temp output variable
|
||||
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second;
|
||||
if (!psOperand->m_SubOperands[0].get())
|
||||
{
|
||||
oss << "[" << psOperand->ui32RegisterNumber << "]";
|
||||
}
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string res = "";
|
||||
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false))
|
||||
{
|
||||
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
|
||||
return outputPrefix + res;
|
||||
else
|
||||
return res;
|
||||
}
|
||||
ASSERT(psOut != NULL);
|
||||
|
||||
oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count)
|
||||
{
|
||||
char compMask = (char)psOperand->ui32CompMask;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams
|
||||
ASSERT(psShader->ui32CurrentVertexOutputStream < 4);
|
||||
|
||||
// First check for various builtins, mostly depth-output ones.
|
||||
if (psShader->eShaderType == PIXEL_SHADER)
|
||||
{
|
||||
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL ||
|
||||
psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH)
|
||||
{
|
||||
// GL doesn't need declaration, Metal does.
|
||||
return psShader->eTargetLanguage == LANG_METAL;
|
||||
}
|
||||
}
|
||||
|
||||
// Needs declaring if any of the components hasn't been already declared
|
||||
if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0)
|
||||
{
|
||||
int offset;
|
||||
const ShaderInfo::InOutSignature* psSignature = NULL;
|
||||
|
||||
if (psOperand->eSpecialName == NAME_UNDEFINED)
|
||||
{
|
||||
// Need to fetch the actual comp mask
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetOutputSignatureFromRegister(
|
||||
psOperand->ui32RegisterNumber,
|
||||
psOperand->ui32CompMask,
|
||||
psShader->ui32CurrentVertexOutputStream,
|
||||
&psSignature);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(
|
||||
psOperand->ui32RegisterNumber,
|
||||
psOperand->ui32CompMask,
|
||||
&psSignature);
|
||||
|
||||
compMask = (char)psSignature->ui32Mask;
|
||||
}
|
||||
for (offset = 0; offset < count; offset++)
|
||||
{
|
||||
psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
212
src/HLSLcc.cpp
Normal file
212
src/HLSLcc.cpp
Normal file
@ -0,0 +1,212 @@
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
#include <memory>
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/toGLSL.h"
|
||||
#include "internal_includes/toMetal.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/decode.h"
|
||||
|
||||
|
||||
#ifndef GL_VERTEX_SHADER_ARB
|
||||
#define GL_VERTEX_SHADER_ARB 0x8B31
|
||||
#endif
|
||||
#ifndef GL_FRAGMENT_SHADER_ARB
|
||||
#define GL_FRAGMENT_SHADER_ARB 0x8B30
|
||||
#endif
|
||||
#ifndef GL_GEOMETRY_SHADER
|
||||
#define GL_GEOMETRY_SHADER 0x8DD9
|
||||
#endif
|
||||
#ifndef GL_TESS_EVALUATION_SHADER
|
||||
#define GL_TESS_EVALUATION_SHADER 0x8E87
|
||||
#endif
|
||||
#ifndef GL_TESS_CONTROL_SHADER
|
||||
#define GL_TESS_CONTROL_SHADER 0x8E88
|
||||
#endif
|
||||
#ifndef GL_COMPUTE_SHADER
|
||||
#define GL_COMPUTE_SHADER 0x91B9
|
||||
#endif
|
||||
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result)
|
||||
{
|
||||
uint32_t* tokens;
|
||||
char* glslcstr = NULL;
|
||||
int GLSLShaderType = GL_FRAGMENT_SHADER_ARB;
|
||||
int success = 0;
|
||||
uint32_t i;
|
||||
|
||||
tokens = (uint32_t*)shader;
|
||||
|
||||
std::auto_ptr<Shader> psShader(DecodeDXBC(tokens, flags));
|
||||
|
||||
if (psShader.get())
|
||||
{
|
||||
HLSLCrossCompilerContext sContext(reflectionCallbacks);
|
||||
|
||||
// Add shader precisions from the list
|
||||
psShader->sInfo.AddSamplerPrecisions(samplerPrecisions);
|
||||
|
||||
if (psShader->ui32MajorVersion <= 3)
|
||||
{
|
||||
flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
|
||||
}
|
||||
|
||||
sContext.psShader = psShader.get();
|
||||
sContext.flags = flags;
|
||||
sContext.psDependencies = dependencies;
|
||||
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
psShader->asPhases[i].hasPostShaderCode = 0;
|
||||
}
|
||||
|
||||
if (language == LANG_METAL)
|
||||
{
|
||||
// Tessellation or geometry shaders are not supported
|
||||
if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER)
|
||||
{
|
||||
result->sourceCode = "";
|
||||
return 0;
|
||||
}
|
||||
ToMetal translator(&sContext);
|
||||
if(!translator.Translate())
|
||||
{
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ToGLSL translator(&sContext);
|
||||
language = translator.SetLanguage(language);
|
||||
translator.SetExtensions(extensions);
|
||||
if (!translator.Translate())
|
||||
{
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_VERTEX_SHADER_ARB;
|
||||
break;
|
||||
}
|
||||
case GEOMETRY_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_GEOMETRY_SHADER;
|
||||
break;
|
||||
}
|
||||
case DOMAIN_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_TESS_EVALUATION_SHADER;
|
||||
break;
|
||||
}
|
||||
case HULL_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_TESS_CONTROL_SHADER;
|
||||
break;
|
||||
}
|
||||
case COMPUTE_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_COMPUTE_SHADER;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
glslcstr = bstr2cstr(sContext.glsl, '\0');
|
||||
result->sourceCode = glslcstr;
|
||||
bcstrfree(glslcstr);
|
||||
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
result->reflection = psShader->sInfo;
|
||||
|
||||
result->textureSamplers = psShader->textureSamplers;
|
||||
|
||||
success = 1;
|
||||
}
|
||||
|
||||
shader = 0;
|
||||
tokens = 0;
|
||||
|
||||
/* Fill in the result struct */
|
||||
|
||||
result->shaderType = GLSLShaderType;
|
||||
result->GLSLLanguage = language;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result)
|
||||
{
|
||||
FILE* shaderFile;
|
||||
int length;
|
||||
size_t readLength;
|
||||
std::vector<char> shader;
|
||||
int success = 0;
|
||||
|
||||
shaderFile = fopen(filename, "rb");
|
||||
|
||||
if (!shaderFile)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
fseek(shaderFile, 0, SEEK_END);
|
||||
length = ftell(shaderFile);
|
||||
fseek(shaderFile, 0, SEEK_SET);
|
||||
|
||||
shader.reserve(length + 1);
|
||||
|
||||
readLength = fread(&shader[0], 1, length, shaderFile);
|
||||
|
||||
fclose(shaderFile);
|
||||
shaderFile = 0;
|
||||
|
||||
shader[readLength] = '\0';
|
||||
|
||||
success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
482
src/HLSLccToolkit.cpp
Normal file
482
src/HLSLccToolkit.cpp
Normal file
@ -0,0 +1,482 @@
|
||||
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/toGLSLOperand.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
uint32_t GetNumberBitsSet(uint32_t a)
|
||||
{
|
||||
// Calculate number of bits in a
|
||||
// Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
|
||||
// Works only up to 14 bits (we're only using up to 4)
|
||||
return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
|
||||
}
|
||||
|
||||
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType)
|
||||
{
|
||||
if (eType == SVT_FLOAT16)
|
||||
{
|
||||
return TO_FLAG_FORCE_HALF;
|
||||
}
|
||||
if (eType == SVT_UINT || eType == SVT_UINT16)
|
||||
{
|
||||
return TO_FLAG_UNSIGNED_INTEGER;
|
||||
}
|
||||
else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12)
|
||||
{
|
||||
return TO_FLAG_INTEGER;
|
||||
}
|
||||
else if (eType == SVT_BOOL)
|
||||
{
|
||||
return TO_FLAG_BOOL;
|
||||
}
|
||||
else
|
||||
{
|
||||
return TO_FLAG_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags)
|
||||
{
|
||||
if (typeflags & TO_FLAG_FORCE_HALF)
|
||||
return SVT_FLOAT16;
|
||||
if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT))
|
||||
return SVT_INT;
|
||||
if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT))
|
||||
return SVT_UINT;
|
||||
if (typeflags & TO_FLAG_BOOL)
|
||||
return SVT_BOOL;
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
|
||||
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
|
||||
const int components, bool useGLSLPrecision)
|
||||
{
|
||||
static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
|
||||
static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
|
||||
static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" };
|
||||
static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" };
|
||||
static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" };
|
||||
static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" };
|
||||
static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" };
|
||||
static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" };
|
||||
static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
|
||||
|
||||
ASSERT(components >= 1 && components <= 4);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case SVT_UINT:
|
||||
return uintTypes[components];
|
||||
case SVT_UINT16:
|
||||
return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
|
||||
case SVT_INT:
|
||||
return intTypes[components];
|
||||
case SVT_INT16:
|
||||
return useGLSLPrecision ? int16Types[components] : intTypes[components];
|
||||
case SVT_INT12:
|
||||
return useGLSLPrecision ? int12Types[components] : intTypes[components];
|
||||
case SVT_FLOAT:
|
||||
return floatTypes[components];
|
||||
case SVT_FLOAT16:
|
||||
return useGLSLPrecision ? float16Types[components] : floatTypes[components];
|
||||
case SVT_FLOAT10:
|
||||
return useGLSLPrecision ? float10Types[components] : floatTypes[components];
|
||||
case SVT_BOOL:
|
||||
return boolTypes[components];
|
||||
default:
|
||||
ASSERT(0);
|
||||
return " ";
|
||||
}
|
||||
}
|
||||
|
||||
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
|
||||
const int components)
|
||||
{
|
||||
static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
|
||||
static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
|
||||
static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" };
|
||||
static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" };
|
||||
static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" };
|
||||
static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" };
|
||||
static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" };
|
||||
|
||||
ASSERT(components >= 1 && components <= 4);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case SVT_UINT:
|
||||
return uintTypes[components];
|
||||
case SVT_UINT16:
|
||||
return ushortTypes[components];
|
||||
case SVT_INT:
|
||||
return intTypes[components];
|
||||
case SVT_INT16:
|
||||
case SVT_INT12:
|
||||
return shortTypes[components];
|
||||
case SVT_FLOAT:
|
||||
return floatTypes[components];
|
||||
case SVT_FLOAT16:
|
||||
case SVT_FLOAT10:
|
||||
return halfTypes[components];
|
||||
case SVT_BOOL:
|
||||
return boolTypes[components];
|
||||
default:
|
||||
ASSERT(0);
|
||||
return " ";
|
||||
}
|
||||
}
|
||||
|
||||
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/)
|
||||
{
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return GetConstructorForTypeMetal(eType, components);
|
||||
else
|
||||
return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision);
|
||||
}
|
||||
|
||||
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
|
||||
{
|
||||
std::string result;
|
||||
std::ostringstream oss;
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
{
|
||||
switch (eBaseType)
|
||||
{
|
||||
case SVT_FLOAT:
|
||||
oss << "float" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT16:
|
||||
case SVT_FLOAT10:
|
||||
oss << "half" << columns << "x" << rows;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (eBaseType)
|
||||
{
|
||||
case SVT_FLOAT:
|
||||
oss << "mat" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT16:
|
||||
oss << "mediump mat" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT10:
|
||||
oss << "lowp mat" << columns << "x" << rows;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
result = oss.str();
|
||||
return result;
|
||||
}
|
||||
|
||||
void AddSwizzleUsingElementCount(bstring dest, uint32_t count)
|
||||
{
|
||||
if (count == 4)
|
||||
return;
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, ".");
|
||||
bcatcstr(dest, "x");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "y");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "z");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "w");
|
||||
count--;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the bits set in mask
|
||||
int WriteMaskToComponentCount(uint32_t writeMask)
|
||||
{
|
||||
// In HLSL bytecode writemask 0 also means everything
|
||||
if (writeMask == 0)
|
||||
return 4;
|
||||
|
||||
return (int)GetNumberBitsSet(writeMask);
|
||||
}
|
||||
|
||||
uint32_t BuildComponentMaskFromElementCount(int count)
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
return (1 << count) - 1;
|
||||
}
|
||||
|
||||
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
|
||||
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src)
|
||||
{
|
||||
if (src == dest)
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) &&
|
||||
(src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16))
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) &&
|
||||
(src == SVT_INT || src == SVT_INT12 || src == SVT_INT16))
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_UINT || dest == SVT_UINT16) &&
|
||||
(src == SVT_UINT || src == SVT_UINT16))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType)
|
||||
{
|
||||
if (eType == RETURN_TYPE_SINT)
|
||||
{
|
||||
return TO_FLAG_INTEGER;
|
||||
}
|
||||
else if (eType == RETURN_TYPE_UINT)
|
||||
{
|
||||
return TO_FLAG_UNSIGNED_INTEGER;
|
||||
}
|
||||
else
|
||||
{
|
||||
return TO_FLAG_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec)
|
||||
{
|
||||
if (eType == RETURN_TYPE_SINT)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_INT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_INT12;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_INT16;
|
||||
}
|
||||
}
|
||||
else if (eType == RETURN_TYPE_UINT)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_UINT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_UINT8;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_FLOAT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_FLOAT10;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_FLOAT16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
|
||||
{
|
||||
return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
|
||||
}
|
||||
|
||||
// Returns true if the operation is commutative
|
||||
bool IsOperationCommutative(int eOpCode)
|
||||
{
|
||||
switch ((OPCODE_TYPE)eOpCode)
|
||||
{
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_OR:
|
||||
case OPCODE_AND:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
// Returns true if operands are identical, only cares about temp registers currently.
|
||||
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB)
|
||||
{
|
||||
if (!psA || !psB)
|
||||
return 0;
|
||||
|
||||
if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP)
|
||||
return 0;
|
||||
|
||||
if (psA->eModifier != psB->eModifier)
|
||||
return 0;
|
||||
|
||||
if (psA->iNumComponents != psB->iNumComponents)
|
||||
return 0;
|
||||
|
||||
if (psA->ui32RegisterNumber != psB->ui32RegisterNumber)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode != psB->eSelMode)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0])
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0]))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool IsAddOneInstruction(const Instruction *psInst)
|
||||
{
|
||||
if (psInst->eOpcode != OPCODE_IADD)
|
||||
return false;
|
||||
if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
|
||||
return false;
|
||||
if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32)
|
||||
return false;
|
||||
|
||||
if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32)
|
||||
return false;
|
||||
if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
|
||||
return false;
|
||||
|
||||
if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim)
|
||||
{
|
||||
switch ((RESOURCE_DIMENSION)eResDim)
|
||||
{
|
||||
case RESOURCE_DIMENSION_TEXTURE1D:
|
||||
return 1;
|
||||
case RESOURCE_DIMENSION_TEXTURE2D:
|
||||
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
|
||||
case RESOURCE_DIMENSION_TEXTURECUBE:
|
||||
return 2;
|
||||
case RESOURCE_DIMENSION_TEXTURE3D:
|
||||
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
|
||||
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
|
||||
return 3;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns the "more important" type of a and b, currently int < uint < float
|
||||
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b)
|
||||
{
|
||||
#define DO_CHECK(type) if( a == type || b == type ) return type
|
||||
|
||||
// Priority ordering
|
||||
DO_CHECK(SVT_FLOAT16);
|
||||
DO_CHECK(SVT_FLOAT10);
|
||||
DO_CHECK(SVT_UINT16);
|
||||
DO_CHECK(SVT_UINT8);
|
||||
DO_CHECK(SVT_INT16);
|
||||
DO_CHECK(SVT_INT12);
|
||||
DO_CHECK(SVT_FORCED_INT);
|
||||
DO_CHECK(SVT_FLOAT);
|
||||
DO_CHECK(SVT_UINT);
|
||||
DO_CHECK(SVT_INT);
|
||||
DO_CHECK(SVT_INT_AMBIGUOUS);
|
||||
|
||||
#undef DO_CHECK
|
||||
// After these just rely on ordering.
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
// Returns true if a direct constructor can convert src->dest
|
||||
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
|
||||
{
|
||||
// uint<->int<->bool conversions possible
|
||||
if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
|
||||
(dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16))
|
||||
return true;
|
||||
|
||||
// float<->double possible
|
||||
if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) &&
|
||||
(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define fpcheck(x) (_isnan(x) || !_finite(x))
|
||||
#else
|
||||
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
|
||||
#endif
|
||||
|
||||
// Helper function to print floats with full precision
|
||||
void PrintFloat(bstring b, float f)
|
||||
{
|
||||
bstring temp;
|
||||
int ePos;
|
||||
int pointPos;
|
||||
|
||||
temp = bformat("%.9g", f);
|
||||
ePos = bstrchrp(temp, 'e', 0);
|
||||
pointPos = bstrchrp(temp, '.', 0);
|
||||
|
||||
bconcat(b, temp);
|
||||
bdestroy(temp);
|
||||
|
||||
if (ePos < 0 && pointPos < 0 && !fpcheck(f))
|
||||
bcatcstr(b, ".0");
|
||||
}
|
||||
};
|
||||
|
10
src/HLSLccTypes.natvis
Normal file
10
src/HLSLccTypes.natvis
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
|
||||
<Type Name="Instruction">
|
||||
<DisplayString>{{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}}</DisplayString>
|
||||
</Type>
|
||||
<Type Name="Operand">
|
||||
<DisplayString>{{ type={eType}, reg={ui32RegisterNumber} }}</DisplayString>
|
||||
</Type>
|
||||
|
||||
</AutoVisualizer>
|
351
src/Instruction.cpp
Normal file
351
src/Instruction.cpp
Normal file
@ -0,0 +1,351 @@
|
||||
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
|
||||
// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles
|
||||
static Operand *GetSrcSwizzleOperand(Instruction *psInst)
|
||||
{
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_NOP:
|
||||
case OPCODE_SWAPC:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
ASSERT(0);
|
||||
return NULL;
|
||||
|
||||
// Normal arithmetics, all srcs have swizzles
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_AND:
|
||||
case OPCODE_DERIV_RTX:
|
||||
case OPCODE_DERIV_RTX_COARSE:
|
||||
case OPCODE_DERIV_RTX_FINE:
|
||||
case OPCODE_DERIV_RTY:
|
||||
case OPCODE_DERIV_RTY_COARSE:
|
||||
case OPCODE_DERIV_RTY_FINE:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_EQ:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_FRC:
|
||||
case OPCODE_FTOI:
|
||||
case OPCODE_FTOU:
|
||||
case OPCODE_GE:
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_IGE:
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IMAD:
|
||||
case OPCODE_IMAX:
|
||||
case OPCODE_IMIN:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_INE:
|
||||
case OPCODE_INEG:
|
||||
case OPCODE_ITOF:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_LT:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_MOV:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_NE:
|
||||
case OPCODE_NOT:
|
||||
case OPCODE_OR:
|
||||
case OPCODE_ROUND_NE:
|
||||
case OPCODE_ROUND_NI:
|
||||
case OPCODE_ROUND_PI:
|
||||
case OPCODE_ROUND_Z:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SINCOS:
|
||||
case OPCODE_SQRT:
|
||||
case OPCODE_UDIV:
|
||||
case OPCODE_UGE:
|
||||
case OPCODE_ULT:
|
||||
case OPCODE_UMAD:
|
||||
case OPCODE_UMAX:
|
||||
case OPCODE_UMIN:
|
||||
case OPCODE_UMUL:
|
||||
case OPCODE_UTOF:
|
||||
case OPCODE_XOR:
|
||||
|
||||
case OPCODE_BFI:
|
||||
case OPCODE_BFREV:
|
||||
case OPCODE_COUNTBITS:
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_DDIV:
|
||||
case OPCODE_DEQ:
|
||||
case OPCODE_DFMA:
|
||||
case OPCODE_DGE:
|
||||
case OPCODE_DLT:
|
||||
case OPCODE_DMAX:
|
||||
case OPCODE_DMIN:
|
||||
case OPCODE_DMUL:
|
||||
case OPCODE_DMOV:
|
||||
case OPCODE_DNE:
|
||||
case OPCODE_DRCP:
|
||||
case OPCODE_DTOF:
|
||||
case OPCODE_F16TOF32:
|
||||
case OPCODE_F32TOF16:
|
||||
case OPCODE_FIRSTBIT_HI:
|
||||
case OPCODE_FIRSTBIT_LO:
|
||||
case OPCODE_FIRSTBIT_SHI:
|
||||
case OPCODE_FTOD:
|
||||
case OPCODE_IBFE:
|
||||
case OPCODE_RCP:
|
||||
case OPCODE_UADDC:
|
||||
case OPCODE_UBFE:
|
||||
case OPCODE_USUBB:
|
||||
case OPCODE_MOVC:
|
||||
case OPCODE_DMOVC:
|
||||
return NULL;
|
||||
|
||||
// Special cases:
|
||||
case OPCODE_GATHER4:
|
||||
case OPCODE_GATHER4_C:
|
||||
case OPCODE_LD:
|
||||
case OPCODE_LD_MS:
|
||||
case OPCODE_LOD:
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
case OPCODE_LD_RAW:
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_RESINFO:
|
||||
return &psInst->asOperands[2];
|
||||
|
||||
case OPCODE_GATHER4_PO:
|
||||
case OPCODE_GATHER4_PO_C:
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
return &psInst->asOperands[3];
|
||||
|
||||
case OPCODE_ISHL:
|
||||
case OPCODE_ISHR:
|
||||
case OPCODE_USHR:
|
||||
return &psInst->asOperands[1];
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
return NULL;
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Tweak the source operands of an instruction so that the rebased write mask will still work
|
||||
static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase)
|
||||
{
|
||||
uint32_t i;
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
default:
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL);
|
||||
|
||||
// Special case for immediates, they do not have swizzles
|
||||
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
if (psOperand->iNumComponents > 1)
|
||||
std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]);
|
||||
return;
|
||||
}
|
||||
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
|
||||
{
|
||||
if (psOperand->iNumComponents > 1)
|
||||
std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Need to change this to swizzle
|
||||
psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE;
|
||||
psOperand->ui32Swizzle = 0;
|
||||
for (i = 0; i < 4 - rebase; i++)
|
||||
psOperand->aui32Swizzle[i] = i + rebase;
|
||||
for (; i < 4; i++)
|
||||
psOperand->aui32Swizzle[i] = rebase; // The first actual input.
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
// Nothing to do
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
for (i = rebase; i < 4; i++)
|
||||
psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t accessMask = 0;
|
||||
int isDestination = 0;
|
||||
Operand *psSwizzleOperand = NULL;
|
||||
|
||||
if (flags & UD_CHANGE_SUBOPERANDS)
|
||||
{
|
||||
for (i = 0; i < MAX_SUB_OPERANDS; i++)
|
||||
{
|
||||
if (psOperand->m_SubOperands[i].get())
|
||||
ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase);
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & UD_CHANGE_MAIN_OPERAND) == 0)
|
||||
return;
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (psOperand->ui32RegisterNumber != oldReg)
|
||||
return;
|
||||
|
||||
accessMask = psOperand->GetAccessMask();
|
||||
// If this operation touches other components than the one(s) we're splitting, skip it
|
||||
if ((accessMask & (~compMask)) != 0)
|
||||
{
|
||||
// Verify that we've not messed up in reachability analysis.
|
||||
// This would mean that we've encountered an instruction that accesses
|
||||
// a component in multi-component mode and we're supposed to treat it as single-use only.
|
||||
// Now that we track operands we can bring this back
|
||||
ASSERT((accessMask & compMask) == 0);
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask);
|
||||
#endif
|
||||
psOperand->ui32RegisterNumber = newReg;
|
||||
|
||||
if (rebase == 0)
|
||||
return;
|
||||
|
||||
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
{
|
||||
uint32_t oldMask = psOperand->ui32CompMask;
|
||||
if (oldMask == 0)
|
||||
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
|
||||
// Check that we're not losing any information
|
||||
ASSERT((oldMask >> rebase) << rebase == oldMask);
|
||||
psOperand->ui32CompMask = (oldMask >> rebase);
|
||||
break;
|
||||
}
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
ASSERT(psOperand->aui32Swizzle[0] >= rebase);
|
||||
psOperand->aui32Swizzle[0] -= rebase;
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
// Note that this rebase is different from the one done for source operands
|
||||
ASSERT(psOperand->aui32Swizzle[i] >= rebase);
|
||||
psOperand->aui32Swizzle[i] -= rebase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(0);
|
||||
|
||||
}
|
||||
|
||||
// Tweak operand datatypes
|
||||
std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]);
|
||||
|
||||
// If this operand is a destination, we'll need to tweak sources as well
|
||||
for (i = 0; i < ui32FirstSrc; i++)
|
||||
{
|
||||
if (psOperand == &asOperands[i])
|
||||
{
|
||||
isDestination = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDestination == 0)
|
||||
return;
|
||||
|
||||
// Nasty corner case of 2 destinations, not supported if both targets are written
|
||||
ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL));
|
||||
|
||||
// If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction
|
||||
switch (eOpcode)
|
||||
{
|
||||
// The opcodes that do not need tweaking:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_BUFINFO:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
return;
|
||||
|
||||
default:
|
||||
psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands
|
||||
if (psSwizzleOperand)
|
||||
{
|
||||
DoSrcOperandRebase(psSwizzleOperand, rebase);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = ui32FirstSrc; i < ui32NumOperands; i++)
|
||||
{
|
||||
DoSrcOperandRebase(&asOperands[i], rebase);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision
|
||||
bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const
|
||||
{
|
||||
const Operand *op;
|
||||
const ResourceBinding *psBinding = NULL;
|
||||
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
|
||||
switch (eOpcode)
|
||||
{
|
||||
default:
|
||||
return false;
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
break;
|
||||
}
|
||||
|
||||
op = &asOperands[3];
|
||||
ASSERT(op->eType == OPERAND_TYPE_SAMPLER);
|
||||
|
||||
info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding);
|
||||
if (!psBinding)
|
||||
{
|
||||
/* Try to look from texture group */
|
||||
info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding);
|
||||
}
|
||||
|
||||
sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN);
|
||||
|
||||
if (sType == OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return false;
|
||||
|
||||
if (pType)
|
||||
*pType = sType;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
363
src/LoopTransform.cpp
Normal file
363
src/LoopTransform.cpp
Normal file
@ -0,0 +1,363 @@
|
||||
|
||||
#include "src/internal_includes/LoopTransform.h"
|
||||
#include "src/internal_includes/Shader.h"
|
||||
#include "src/internal_includes/debug.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
|
||||
struct LoopInfo
|
||||
{
|
||||
public:
|
||||
LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {}
|
||||
|
||||
Instruction * m_StartLoop; // OPCODE_LOOP
|
||||
Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above.
|
||||
std::vector<Instruction *> m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth
|
||||
bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing.
|
||||
};
|
||||
|
||||
typedef std::list<LoopInfo> Loops;
|
||||
|
||||
// Build a loopinfo array of all the loops in this shader phase
|
||||
void BuildLoopInfo(ShaderPhase &phase, Loops &res)
|
||||
{
|
||||
using namespace std;
|
||||
res.clear();
|
||||
|
||||
Instruction *i = &phase.psInst[0];
|
||||
// A stack of loopinfo elements (stored in res)
|
||||
list<LoopInfo *> loopStack;
|
||||
|
||||
// Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here.
|
||||
list<LoopInfo> dummyLIForSwitches;
|
||||
|
||||
while (i != &*phase.psInst.end())
|
||||
{
|
||||
if (i->eOpcode == OPCODE_LOOP)
|
||||
{
|
||||
LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo());
|
||||
currLoopInfo->m_StartLoop = i;
|
||||
loopStack.push_front(currLoopInfo);
|
||||
}
|
||||
else if(i->eOpcode == OPCODE_ENDLOOP)
|
||||
{
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
loopStack.pop_front();
|
||||
li->m_EndLoop = i;
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_SWITCH)
|
||||
{
|
||||
// Create a dummy entry into the stack
|
||||
LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo());
|
||||
li->m_IsSwitch = true;
|
||||
loopStack.push_front(li);
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_ENDSWITCH)
|
||||
{
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
loopStack.pop_front();
|
||||
ASSERT(li->m_IsSwitch);
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC)
|
||||
{
|
||||
// Get the current loopstack head
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
// Ignore breaks from switch-cases
|
||||
if(!li->m_IsSwitch)
|
||||
{
|
||||
li->m_ExitPoints.push_back(i);
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp
|
||||
static bool IsScalarTempComparisonInstruction(const Instruction *i)
|
||||
{
|
||||
switch (i->eOpcode)
|
||||
{
|
||||
default:
|
||||
return false;
|
||||
case OPCODE_IGE:
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_INE:
|
||||
case OPCODE_UGE:
|
||||
case OPCODE_ULT:
|
||||
break;
|
||||
}
|
||||
|
||||
if (i->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
int tempOp = -1;
|
||||
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP)
|
||||
tempOp = 1;
|
||||
else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP)
|
||||
tempOp = 2;
|
||||
|
||||
// Also reject comparisons where we compare temp.x vs temp.y
|
||||
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber)
|
||||
return false;
|
||||
|
||||
if (tempOp == -1)
|
||||
return false;
|
||||
|
||||
if (i->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX <op> imm32
|
||||
static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b)
|
||||
{
|
||||
if (a->eOpcode != b->eOpcode)
|
||||
return false;
|
||||
ASSERT(a->ui32NumOperands == b->ui32NumOperands);
|
||||
uint32_t dstReg = 0;
|
||||
if (a->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
dstReg = a->asOperands[0].ui32RegisterNumber;
|
||||
|
||||
for (uint32_t i = 0; i < a->ui32NumOperands; i++)
|
||||
{
|
||||
const Operand &aop = a->asOperands[i];
|
||||
const Operand &bop = b->asOperands[i];
|
||||
if (aop.eType != bop.eType)
|
||||
return false;
|
||||
|
||||
if (aop.GetAccessMask() != bop.GetAccessMask())
|
||||
return false;
|
||||
|
||||
if (aop.GetNumSwizzleElements() != 1)
|
||||
return false;
|
||||
|
||||
if (aop.eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (aop.ui32RegisterNumber != bop.ui32RegisterNumber)
|
||||
return false;
|
||||
if (aop.ui32RegisterNumber != dstReg)
|
||||
return false;
|
||||
}
|
||||
else if (aop.eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt to transform a single loop into a for-statement
|
||||
static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li)
|
||||
{
|
||||
// In order to transform a loop into a for, the following has to hold:
|
||||
// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
|
||||
// - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above
|
||||
// Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement.
|
||||
// Also, the loop induction variable must be standalone (as in, never used as part of a larger vector)
|
||||
|
||||
Instruction *cmpInst = li.m_StartLoop + 1;
|
||||
|
||||
if (!IsScalarTempComparisonInstruction(cmpInst))
|
||||
return;
|
||||
|
||||
Instruction *breakInst = li.m_StartLoop + 2;
|
||||
if (breakInst->eOpcode != OPCODE_BREAKC)
|
||||
return;
|
||||
if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber)
|
||||
return;
|
||||
|
||||
// Check that the comparison result isn't used anywhere else
|
||||
if (cmpInst->m_Uses.size() != 1)
|
||||
return;
|
||||
|
||||
ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst);
|
||||
|
||||
// Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable
|
||||
uint32_t inductionVarIdx = 0;
|
||||
|
||||
Instruction *lastInst = li.m_EndLoop - 1;
|
||||
if (lastInst->eOpcode != OPCODE_IADD)
|
||||
return;
|
||||
if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (lastInst->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
return;
|
||||
|
||||
uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber;
|
||||
// Verify that the induction variable actually matches.
|
||||
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar)
|
||||
inductionVarIdx = 1;
|
||||
else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar)
|
||||
inductionVarIdx = 2;
|
||||
else
|
||||
return;
|
||||
|
||||
// Verify that we also read from the induction variable in the last instruction
|
||||
if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) ||
|
||||
(lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar)))
|
||||
return;
|
||||
|
||||
// Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops,
|
||||
// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
|
||||
// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
|
||||
// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
|
||||
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
|
||||
{
|
||||
switch (itr->eOpcode)
|
||||
{
|
||||
case OPCODE_LD_RAW:
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
case OPCODE_STORE_RAW:
|
||||
case OPCODE_STORE_STRUCTURED:
|
||||
case OPCODE_STORE_UAV_TYPED:
|
||||
return; // Nope, can't do a for, not even a partial one.
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst.
|
||||
// Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called.
|
||||
// Of course, if all those instructions are identical, then it's fine.
|
||||
// Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well.
|
||||
|
||||
Instruction *initializer = NULL;
|
||||
std::vector<const Operand::Define *> definitionsOutsideRange;
|
||||
std::vector<const Operand::Define *> definitionsInsideRange;
|
||||
std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def)
|
||||
{
|
||||
if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop)
|
||||
definitionsOutsideRange.push_back(&def);
|
||||
else
|
||||
definitionsInsideRange.push_back(&def);
|
||||
});
|
||||
|
||||
if (definitionsInsideRange.size() != 1)
|
||||
{
|
||||
// All definitions must be identical
|
||||
for (std::vector<const Operand::Define*>::iterator itr = definitionsInsideRange.begin()+1; itr != definitionsInsideRange.end(); itr++)
|
||||
{
|
||||
if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(definitionsOutsideRange.size() > 0);
|
||||
if (definitionsOutsideRange.size() == 1)
|
||||
initializer = definitionsOutsideRange[0]->m_Inst;
|
||||
|
||||
// Initializer must only write to one component
|
||||
if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
initializer = 0;
|
||||
|
||||
// Check that the initializer is only used within the range so we can move it to for statement
|
||||
if (initializer)
|
||||
{
|
||||
bool hasUsesOutsideRange = false;
|
||||
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u)
|
||||
{
|
||||
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
|
||||
hasUsesOutsideRange = true;
|
||||
});
|
||||
// Has outside uses? we cannot pull that up to the for statement
|
||||
if (hasUsesOutsideRange)
|
||||
initializer = 0;
|
||||
}
|
||||
|
||||
// Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either
|
||||
if (initializer)
|
||||
{
|
||||
bool cannotDoInitializer = false;
|
||||
for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++)
|
||||
{
|
||||
const Instruction::Use &u = *itr;
|
||||
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
|
||||
{
|
||||
cannotDoInitializer = true;
|
||||
break;
|
||||
}
|
||||
// Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var)
|
||||
if (u.m_Op->GetAccessMask() != 1)
|
||||
{
|
||||
cannotDoInitializer = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Has outside uses? we cannot pull that up to the for statement
|
||||
if (cannotDoInitializer)
|
||||
initializer = 0;
|
||||
}
|
||||
|
||||
|
||||
if (initializer)
|
||||
{
|
||||
// We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that.
|
||||
uint32_t newRegister = phase.m_NextFreeTempRegister++;
|
||||
li.m_StartLoop->m_InductorRegister = newRegister;
|
||||
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u)
|
||||
{
|
||||
u.m_Op->m_ForLoopInductorName = newRegister;
|
||||
});
|
||||
// Also tweak the destinations for cmpInst, and lastInst
|
||||
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
|
||||
cmpInst->asOperands[1].m_ForLoopInductorName = newRegister;
|
||||
else
|
||||
cmpInst->asOperands[2].m_ForLoopInductorName = newRegister;
|
||||
|
||||
if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
|
||||
lastInst->asOperands[1].m_ForLoopInductorName = newRegister;
|
||||
else
|
||||
lastInst->asOperands[2].m_ForLoopInductorName = newRegister;
|
||||
|
||||
lastInst->asOperands[0].m_ForLoopInductorName = newRegister;
|
||||
initializer->asOperands[0].m_ForLoopInductorName = newRegister;
|
||||
}
|
||||
|
||||
// This loop can be transformed to for-loop. Do the necessary magicks.
|
||||
li.m_StartLoop->m_LoopInductors[0] = initializer;
|
||||
li.m_StartLoop->m_LoopInductors[1] = cmpInst;
|
||||
li.m_StartLoop->m_LoopInductors[2] = breakInst;
|
||||
li.m_StartLoop->m_LoopInductors[3] = lastInst;
|
||||
|
||||
if (initializer)
|
||||
initializer->m_SkipTranslation = true;
|
||||
cmpInst->m_SkipTranslation = true;
|
||||
breakInst->m_SkipTranslation = true;
|
||||
lastInst->m_SkipTranslation = true;
|
||||
|
||||
}
|
||||
|
||||
void DoLoopTransform(ShaderPhase &phase)
|
||||
{
|
||||
Loops loops;
|
||||
BuildLoopInfo(phase, loops);
|
||||
|
||||
std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li)
|
||||
{
|
||||
// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
|
||||
// Also that there's at least 2 instructions in loop body
|
||||
ASSERT(li.m_StartLoop != 0);
|
||||
ASSERT(li.m_EndLoop != 0);
|
||||
ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
|
||||
ASSERT(!li.m_IsSwitch);
|
||||
ASSERT(!li.m_ExitPoints.empty());
|
||||
AttemptLoopTransform(phase, li);
|
||||
});
|
||||
}
|
||||
};
|
586
src/Operand.cpp
Normal file
586
src/Operand.cpp
Normal file
@ -0,0 +1,586 @@
|
||||
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
|
||||
uint32_t Operand::GetAccessMask() const
|
||||
{
|
||||
int i;
|
||||
uint32_t accessMask = 0;
|
||||
// TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now.
|
||||
switch (eSelMode)
|
||||
{
|
||||
default:
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
// Update access mask
|
||||
accessMask = ui32CompMask;
|
||||
if (accessMask == 0)
|
||||
accessMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
break;
|
||||
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
accessMask = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
accessMask |= 1 << (aui32Swizzle[i]);
|
||||
break;
|
||||
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
accessMask = 1 << (aui32Swizzle[0]);
|
||||
break;
|
||||
|
||||
}
|
||||
ASSERT(accessMask != 0);
|
||||
return accessMask;
|
||||
}
|
||||
|
||||
int Operand::GetMaxComponent() const
|
||||
{
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents == 4)
|
||||
{
|
||||
//Component Mask
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W))
|
||||
{
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
//Component Swizzle
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == NO_SWIZZLE)
|
||||
return 4;
|
||||
|
||||
uint32_t res = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
res = std::max(aui32Swizzle[i], res);
|
||||
}
|
||||
return (int)res + 1;
|
||||
}
|
||||
else
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
//Single component repeated
|
||||
//e..g .wwww
|
||||
bool Operand::IsSwizzleReplicated() const
|
||||
{
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents == 4)
|
||||
{
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == WWWW_SWIZZLE ||
|
||||
ui32Swizzle == ZZZZ_SWIZZLE ||
|
||||
ui32Swizzle == YYYY_SWIZZLE ||
|
||||
ui32Swizzle == XXXX_SWIZZLE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Get the number of elements returned by operand, taking additional component mask into account
|
||||
uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const
|
||||
{
|
||||
uint32_t count = 0;
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
|
||||
return 1; // TODO: does mask make any sense here?
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
// Adjust component count and break to more processing
|
||||
((Operand *)this)->iNumComponents = 3;
|
||||
break;
|
||||
case OPERAND_TYPE_IMMEDIATE32:
|
||||
case OPERAND_TYPE_IMMEDIATE64:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH:
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
uint32_t compMask = (1 << iNumComponents) - 1;
|
||||
|
||||
compMask &= _ui32CompMask;
|
||||
// Calculate bits left in compMask
|
||||
return HLSLcc::GetNumberBitsSet(compMask);
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents != 1)
|
||||
{
|
||||
//Component Mask
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
uint32_t compMask = ui32CompMask;
|
||||
if (compMask == 0)
|
||||
compMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
compMask &= _ui32CompMask;
|
||||
|
||||
if (compMask == OPERAND_4_COMPONENT_MASK_ALL)
|
||||
return 4;
|
||||
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_X)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_Y)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_Z)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_W)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
else
|
||||
//Component Swizzle
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
if ((_ui32CompMask & (1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
count++;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else
|
||||
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else
|
||||
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else
|
||||
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
//Component Select 1
|
||||
}
|
||||
|
||||
if (!count)
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
uint32_t compMask = (1 << iNumComponents) - 1;
|
||||
|
||||
compMask &= _ui32CompMask;
|
||||
// Calculate bits left in compMask
|
||||
return HLSLcc::GetNumberBitsSet(compMask);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
|
||||
int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const
|
||||
{
|
||||
if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER)
|
||||
return 0;
|
||||
|
||||
if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE)
|
||||
return 0;
|
||||
|
||||
if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT)
|
||||
return 0;
|
||||
|
||||
if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
|
||||
{
|
||||
return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase);
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
|
||||
{
|
||||
// The min precision qualifier overrides all of the stuff below
|
||||
switch (eMinPrecision)
|
||||
{
|
||||
case OPERAND_MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_2_8:
|
||||
return SVT_FLOAT10;
|
||||
case OPERAND_MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case OPERAND_MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_TEMP:
|
||||
{
|
||||
SHADER_VARIABLE_TYPE eCurrentType;
|
||||
int i = 0;
|
||||
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
return aeDataType[aui32Swizzle[0]];
|
||||
}
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == (NO_SWIZZLE))
|
||||
{
|
||||
return aeDataType[0];
|
||||
}
|
||||
|
||||
return aeDataType[aui32Swizzle[0]];
|
||||
}
|
||||
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
uint32_t mask = ui32CompMask;
|
||||
if (!mask)
|
||||
{
|
||||
mask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
}
|
||||
for (; i < 4; ++i)
|
||||
{
|
||||
if (mask & (1 << i))
|
||||
{
|
||||
eCurrentType = aeDataType[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
//Check if all elements have the same basic type.
|
||||
for (; i < 4; ++i)
|
||||
{
|
||||
if (mask & (1 << i))
|
||||
{
|
||||
if (eCurrentType != aeDataType[i])
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return eCurrentType;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT:
|
||||
{
|
||||
const uint32_t ui32Register = ui32RegisterNumber;
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
const ShaderInfo::InOutSignature* psOut = NULL;
|
||||
|
||||
if (regSpace == 0)
|
||||
psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
|
||||
&psOut);
|
||||
else
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut);
|
||||
|
||||
ASSERT(psOut != NULL);
|
||||
if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
|
||||
{
|
||||
switch (psOut->eMinPrec)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case MIN_PRECISION_FLOAT_2_8:
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return SVT_FLOAT16;
|
||||
else
|
||||
return SVT_FLOAT10;
|
||||
case MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
if (psOut->eComponentType == INOUT_COMPONENT_UINT32)
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
else if (psOut->eComponentType == INOUT_COMPONENT_SINT32)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
return SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_INPUT:
|
||||
{
|
||||
const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1];
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
const ShaderInfo::InOutSignature* psIn = NULL;
|
||||
|
||||
if (regSpace == 0)
|
||||
{
|
||||
if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0)
|
||||
return SVT_FLOAT; // All combined inputs are stored as floats
|
||||
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(),
|
||||
&psIn);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0)
|
||||
return SVT_FLOAT; // All combined inputs are stored as floats
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn);
|
||||
}
|
||||
|
||||
ASSERT(psIn != NULL);
|
||||
|
||||
switch (eSpecialName)
|
||||
{
|
||||
//UINT in DX, INT in GL.
|
||||
case NAME_PRIMITIVE_ID:
|
||||
case NAME_VERTEX_ID:
|
||||
case NAME_INSTANCE_ID:
|
||||
case NAME_RENDER_TARGET_ARRAY_INDEX:
|
||||
case NAME_VIEWPORT_ARRAY_INDEX:
|
||||
case NAME_SAMPLE_INDEX:
|
||||
|
||||
return SVT_INT;
|
||||
|
||||
case NAME_IS_FRONT_FACE:
|
||||
return SVT_UINT;
|
||||
|
||||
case NAME_POSITION:
|
||||
case NAME_CLIP_DISTANCE:
|
||||
return SVT_FLOAT;
|
||||
|
||||
default:
|
||||
break;
|
||||
// fall through
|
||||
}
|
||||
|
||||
if (psIn->eSystemValueType == NAME_IS_FRONT_FACE)
|
||||
return SVT_UINT;
|
||||
|
||||
if (eSpecialName == NAME_PRIMITIVE_ID || eSpecialName == NAME_VERTEX_ID)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
|
||||
//UINT in DX, INT in GL.
|
||||
if (psIn->eSystemValueType == NAME_INSTANCE_ID ||
|
||||
psIn->eSystemValueType == NAME_PRIMITIVE_ID ||
|
||||
psIn->eSystemValueType == NAME_VERTEX_ID ||
|
||||
psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX ||
|
||||
psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX ||
|
||||
psIn->eSystemValueType == NAME_SAMPLE_INDEX
|
||||
)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
|
||||
if (psIn->eMinPrec != MIN_PRECISION_DEFAULT)
|
||||
{
|
||||
switch (psIn->eMinPrec)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case MIN_PRECISION_FLOAT_2_8:
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return SVT_FLOAT16;
|
||||
else
|
||||
return SVT_FLOAT10;
|
||||
case MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
|
||||
if (psIn->eComponentType == INOUT_COMPONENT_UINT32)
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
else if (psIn->eComponentType == INOUT_COMPONENT_SINT32)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
return SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_CONSTANT_BUFFER:
|
||||
{
|
||||
const ConstantBuffer* psCBuf = NULL;
|
||||
const ShaderVarType* psVarType = NULL;
|
||||
int32_t rebase = -1;
|
||||
bool isArray;
|
||||
int foundVar;
|
||||
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
|
||||
if (psCBuf)
|
||||
{
|
||||
foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
|
||||
if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing?
|
||||
{
|
||||
return psVarType->Type;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Todo: this isn't correct yet.
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_IMMEDIATE32:
|
||||
{
|
||||
return ePreferredTypeForImmediates;
|
||||
}
|
||||
|
||||
case OPERAND_TYPE_IMMEDIATE64:
|
||||
{
|
||||
return SVT_DOUBLE;
|
||||
}
|
||||
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
case OPERAND_TYPE_SPECIAL_ADDRESS:
|
||||
case OPERAND_TYPE_SPECIAL_LOOPCOUNTER:
|
||||
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
|
||||
case OPERAND_TYPE_INPUT_PRIMITIVEID:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats
|
||||
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently
|
||||
default:
|
||||
{
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
}
|
||||
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
|
||||
OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
case REFLECT_RESOURCE_PRECISION_UNKNOWN:
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return OPERAND_MIN_PRECISION_FLOAT_2_8;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return OPERAND_MIN_PRECISION_FLOAT_16;
|
||||
case REFLECT_RESOURCE_PRECISION_HIGHP:
|
||||
return OPERAND_MIN_PRECISION_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const
|
||||
{
|
||||
const ShaderInfo::InOutSignature *psSig = NULL;
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
|
||||
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
|
||||
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
|
||||
return 1;
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
|
||||
return 3;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
|
||||
else
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
|
||||
|
||||
ASSERT(psSig != NULL);
|
||||
|
||||
// TODO: Are there ever any cases where the mask has 'holes'?
|
||||
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
|
||||
}
|
1018
src/Shader.cpp
Normal file
1018
src/Shader.cpp
Normal file
File diff suppressed because it is too large
Load Diff
387
src/ShaderInfo.cpp
Normal file
387
src/ShaderInfo.cpp
Normal file
@ -0,0 +1,387 @@
|
||||
|
||||
#include "ShaderInfo.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "Operand.h"
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
|
||||
SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
|
||||
{
|
||||
const ResourceBinding* psBinding = 0;
|
||||
int found;
|
||||
found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding);
|
||||
ASSERT(found != 0);
|
||||
return psBinding->GetDataType();
|
||||
}
|
||||
|
||||
void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const
|
||||
{
|
||||
ASSERT(ui32MajorVersion > 3);
|
||||
*ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]];
|
||||
}
|
||||
|
||||
int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumBindings = psResourceBindings.size();
|
||||
const ResourceBinding* psBindings = &psResourceBindings[0];
|
||||
|
||||
for (i = 0; i < ui32NumBindings; ++i)
|
||||
{
|
||||
if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup)
|
||||
{
|
||||
if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount))
|
||||
{
|
||||
*ppsOutBinding = psBindings + i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset &&
|
||||
ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size))
|
||||
{
|
||||
*ppsShaderVar = &psThisPointerConstBuffer->asVars[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psInputSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0))
|
||||
{
|
||||
*ppsOut = &psInputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(allowNull);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psPatchConstantSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0))
|
||||
{
|
||||
*ppsOut = &psPatchConstantSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (allowNull)
|
||||
return 0;
|
||||
|
||||
// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
|
||||
// In those situations just take the last signature that uses that register (it's typically the "highest" one)
|
||||
for (i = ui32NumVars - 1; i != 0xffffffff; i--)
|
||||
{
|
||||
if (ui32Register == psPatchConstantSignatures[i].ui32Register)
|
||||
{
|
||||
*ppsOut = &psPatchConstantSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register,
|
||||
const uint32_t ui32CompMask,
|
||||
const uint32_t ui32Stream,
|
||||
const InOutSignature** ppsOut,
|
||||
bool allowNull /* = false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psOutputSignatures.size();
|
||||
ASSERT(ui32CompMask != 0);
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (ui32Register == psOutputSignatures[i].ui32Register &&
|
||||
(ui32CompMask & psOutputSignatures[i].ui32Mask) &&
|
||||
ui32Stream == psOutputSignatures[i].ui32Stream)
|
||||
{
|
||||
*ppsOut = &psOutputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(allowNull);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psOutputSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (eSystemValueType == psOutputSignatures[i].eSystemValueType &&
|
||||
ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex)
|
||||
{
|
||||
*ppsOut = &psOutputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors)
|
||||
{
|
||||
// Struct size is calculated from the offset and size of its last member
|
||||
if (psType->Class == SVC_STRUCT)
|
||||
{
|
||||
return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors);
|
||||
}
|
||||
|
||||
// Matrices represented as vec4 arrays have special size calculation
|
||||
if (matrixAsVectors)
|
||||
{
|
||||
if (psType->Class == SVC_MATRIX_ROWS)
|
||||
{
|
||||
return psType->Rows * 16;
|
||||
}
|
||||
else if (psType->Class == SVC_MATRIX_COLUMNS)
|
||||
{
|
||||
return psType->Columns * 16;
|
||||
}
|
||||
}
|
||||
|
||||
// Regular matrices, vectors and scalars
|
||||
return psType->Columns * psType->Rows * 4;
|
||||
}
|
||||
|
||||
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
|
||||
uint32_t parentOffset,
|
||||
uint32_t offsetToFind,
|
||||
bool* isArray,
|
||||
std::vector<uint32_t>* arrayIndices,
|
||||
int32_t* pi32Rebase,
|
||||
uint32_t flags)
|
||||
{
|
||||
uint32_t thisOffset = parentOffset + psType->Offset;
|
||||
uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
|
||||
uint32_t paddedSize = thisSize;
|
||||
if (thisSize % 16 > 0)
|
||||
paddedSize += (16 - (thisSize % 16));
|
||||
uint32_t arraySize = thisSize;
|
||||
|
||||
// Array elements are padded to align on vec4 size, except for the last one
|
||||
if (psType->Elements)
|
||||
arraySize = (paddedSize * (psType->Elements - 1)) + thisSize;
|
||||
|
||||
if ((offsetToFind >= thisOffset) &&
|
||||
offsetToFind < (thisOffset + arraySize))
|
||||
{
|
||||
*isArray = false;
|
||||
if (psType->Class == SVC_STRUCT)
|
||||
{
|
||||
if (psType->Elements > 1 && arrayIndices != NULL)
|
||||
arrayIndices->push_back((offsetToFind - thisOffset) / thisSize);
|
||||
|
||||
// Need to bring offset back to element zero in case of array of structs
|
||||
uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize;
|
||||
uint32_t m = 0;
|
||||
|
||||
for (m = 0; m < psType->MemberCount; ++m)
|
||||
{
|
||||
const ShaderVarType* psMember = &psType->Members[m];
|
||||
|
||||
const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags);
|
||||
if (foundType != NULL)
|
||||
return foundType;
|
||||
}
|
||||
}
|
||||
// Check for array of scalars or vectors (both take up 16 bytes per element).
|
||||
// Matrices are also treated as arrays of vectors.
|
||||
else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) ||
|
||||
((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1))
|
||||
{
|
||||
*isArray = true;
|
||||
if (arrayIndices != NULL)
|
||||
arrayIndices->push_back((offsetToFind - thisOffset) / 16);
|
||||
}
|
||||
else if (psType->Class == SVC_VECTOR)
|
||||
{
|
||||
//Check for vector starting at a non-vec4 offset.
|
||||
|
||||
// cbuffer $Globals
|
||||
// {
|
||||
//
|
||||
// float angle; // Offset: 0 Size: 4
|
||||
// float2 angle2; // Offset: 4 Size: 8
|
||||
//
|
||||
// }
|
||||
|
||||
//cb0[0].x = angle
|
||||
//cb0[0].yzyy = angle2.xyxx
|
||||
|
||||
//Rebase angle2 so that .y maps to .x, .z maps to .y
|
||||
|
||||
pi32Rebase[0] = thisOffset % 16;
|
||||
}
|
||||
|
||||
return psType;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
|
||||
const uint32_t(&pui32Swizzle)[4],
|
||||
const ConstantBuffer* psCBuf,
|
||||
const ShaderVarType** ppsShaderVar, // Output the found var
|
||||
bool* isArray, // Output bool that tells if the found var is an array
|
||||
std::vector<uint32_t>* arrayIndices, // Output vector of array indices in order from root parent to the found var
|
||||
int32_t* pi32Rebase, // Output swizzle rebase
|
||||
uint32_t flags)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
uint32_t ui32ByteOffset = ui32Vec4Offset * 16;
|
||||
|
||||
//Swizzle can point to another variable. In the example below
|
||||
//cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined
|
||||
//into vectors. psCBuf->ui32NumVars will be 3.
|
||||
|
||||
// cbuffer cbUIUpdates
|
||||
// {
|
||||
// float g_fLifeSpan; // Offset: 0 Size: 4
|
||||
// float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused]
|
||||
// float g_fRadiusMin; // Offset: 8 Size: 4 [unused]
|
||||
// float g_fRadiusMax; // Offset: 12 Size: 4 [unused]
|
||||
// float g_fGrowTime; // Offset: 16 Size: 4 [unused]
|
||||
// float g_fStepSize; // Offset: 20 Size: 4
|
||||
// float g_fTurnRate; // Offset: 24 Size: 4
|
||||
// float g_fTurnSpeed; // Offset: 28 Size: 4 [unused]
|
||||
// float g_fLeafRate; // Offset: 32 Size: 4
|
||||
// float g_fShrinkTime; // Offset: 36 Size: 4 [unused]
|
||||
// uint g_uMaxFaces; // Offset: 40 Size: 4
|
||||
// }
|
||||
if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y)
|
||||
{
|
||||
ui32ByteOffset += 4;
|
||||
}
|
||||
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z)
|
||||
{
|
||||
ui32ByteOffset += 8;
|
||||
}
|
||||
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W)
|
||||
{
|
||||
ui32ByteOffset += 12;
|
||||
}
|
||||
|
||||
const size_t ui32NumVars = psCBuf->asVars.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags);
|
||||
|
||||
if (ppsShaderVar[0] != NULL)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
|
||||
// Searches for brackets and inserts indices one by one.
|
||||
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
size_t prevpos = 0;
|
||||
size_t pos = psShaderVar->fullName.find('[', 0);
|
||||
uint32_t i = 0;
|
||||
while (pos != std::string::npos)
|
||||
{
|
||||
pos++;
|
||||
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
|
||||
if (i < indices.size())
|
||||
oss << indices[i];
|
||||
prevpos = pos;
|
||||
i++;
|
||||
pos = psShaderVar->fullName.find('[', prevpos);
|
||||
}
|
||||
oss << psShaderVar->fullName.substr(prevpos);
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
|
||||
{
|
||||
switch (eType)
|
||||
{
|
||||
case RTYPE_CBUFFER:
|
||||
return RGROUP_CBUFFER;
|
||||
|
||||
case RTYPE_SAMPLER:
|
||||
return RGROUP_SAMPLER;
|
||||
|
||||
case RTYPE_TEXTURE:
|
||||
case RTYPE_BYTEADDRESS:
|
||||
case RTYPE_STRUCTURED:
|
||||
return RGROUP_TEXTURE;
|
||||
|
||||
case RTYPE_UAV_RWTYPED:
|
||||
case RTYPE_UAV_RWSTRUCTURED:
|
||||
case RTYPE_UAV_RWBYTEADDRESS:
|
||||
case RTYPE_UAV_APPEND_STRUCTURED:
|
||||
case RTYPE_UAV_CONSUME_STRUCTURED:
|
||||
case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
|
||||
return RGROUP_UAV;
|
||||
|
||||
case RTYPE_TBUFFER:
|
||||
ASSERT(0); // Need to find out which group this belongs to
|
||||
return RGROUP_TEXTURE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
return RGROUP_CBUFFER;
|
||||
}
|
||||
|
||||
void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
|
||||
{
|
||||
if (info.empty())
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < psResourceBindings.size(); i++)
|
||||
{
|
||||
ResourceBinding *rb = &psResourceBindings[i];
|
||||
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE)
|
||||
continue;
|
||||
|
||||
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match
|
||||
|
||||
// If match not found, check if name has "sampler" prefix
|
||||
// -> try finding a match without the prefix (DX11 style sampler case)
|
||||
if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
|
||||
j = info.find(rb->name.substr(7, rb->name.size() - 7));
|
||||
|
||||
if (j != info.end())
|
||||
rb->ePrecision = j->second;
|
||||
}
|
||||
}
|
887
src/UseDefineChains.cpp
Normal file
887
src/UseDefineChains.cpp
Normal file
@ -0,0 +1,887 @@
|
||||
|
||||
#include "internal_includes/UseDefineChains.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
using HLSLcc::ForEachOperand;
|
||||
|
||||
#define DEBUG_UDCHAINS 0
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
// Debug mode
|
||||
static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[idx].begin();
|
||||
UseDefineChain::iterator ud = psUDChains[idx].begin();
|
||||
while (du != psDUChains[idx].end())
|
||||
{
|
||||
ASSERT(du->index == idx % 4);
|
||||
// Check that the definition actually writes to idx
|
||||
{
|
||||
uint32_t tempReg = idx / 4;
|
||||
uint32_t offs = idx - (tempReg * 4);
|
||||
uint32_t accessMask = 1 << offs;
|
||||
uint32_t i;
|
||||
int found = 0;
|
||||
for (i = 0; i < du->psInst->ui32FirstSrc; i++)
|
||||
{
|
||||
if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg)
|
||||
{
|
||||
uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]);
|
||||
if (writeMask & accessMask)
|
||||
{
|
||||
ASSERT(writeMask == du->writeMask);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT(found);
|
||||
}
|
||||
|
||||
// Check that each usage of each definition also is found in the use-define chain
|
||||
UsageSet::iterator ul = du->usages.begin();
|
||||
while (ul != du->usages.end())
|
||||
{
|
||||
// Search for the usage in the chain
|
||||
UseDefineChain::iterator use = ud;
|
||||
while (use != psUDChains[idx].end() && &*use != *ul)
|
||||
use++;
|
||||
ASSERT(use != psUDChains[idx].end());
|
||||
ASSERT(&*use == *ul);
|
||||
|
||||
// Check that the mapping back is also found
|
||||
ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end());
|
||||
|
||||
ul++;
|
||||
}
|
||||
|
||||
du++;
|
||||
}
|
||||
}
|
||||
|
||||
static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[idx].begin();
|
||||
UseDefineChain::iterator ud = psUDChains[idx].begin();
|
||||
while (ud != psUDChains[idx].end())
|
||||
{
|
||||
// Check that each definition of each usage also is found in the define-use chain
|
||||
DefineSet::iterator dl = ud->defines.begin();
|
||||
ASSERT(ud->psOp->ui32RegisterNumber == idx / 4);
|
||||
ASSERT(ud->index == idx % 4);
|
||||
while (dl != ud->defines.end())
|
||||
{
|
||||
// Search for the definition in the chain
|
||||
DefineUseChain::iterator def = du;
|
||||
while (def != psDUChains[idx].end() && &*def != *dl)
|
||||
def++;
|
||||
ASSERT(def != psDUChains[idx].end());
|
||||
ASSERT(&*def == *dl);
|
||||
|
||||
// Check that the mapping back is also found
|
||||
ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end());
|
||||
|
||||
dl++;
|
||||
}
|
||||
ud++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < tempRegs * 4; i++)
|
||||
{
|
||||
UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions);
|
||||
UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions);
|
||||
}
|
||||
}
|
||||
|
||||
#define printf_console printf
|
||||
|
||||
#endif
|
||||
|
||||
using namespace HLSLcc::ControlFlow;
|
||||
using std::for_each;
|
||||
|
||||
static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index)
|
||||
{
|
||||
// Try to find an existing entry
|
||||
auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de)
|
||||
{
|
||||
return de.psInst == def.m_Instruction && de.psOp == def.m_Operand;
|
||||
});
|
||||
|
||||
if (itr != psDUChain.end())
|
||||
{
|
||||
return &(*itr);
|
||||
}
|
||||
|
||||
// Not found, create
|
||||
psDUChain.push_front(DefineUseChainEntry());
|
||||
DefineUseChainEntry &de = *psDUChain.begin();
|
||||
|
||||
de.psInst = (Instruction *)def.m_Instruction;
|
||||
de.psOp = (Operand *)def.m_Operand;
|
||||
de.index = index;
|
||||
de.writeMask = def.m_Operand->GetAccessMask();
|
||||
de.psSiblings[index] = &de;
|
||||
|
||||
return &de;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Do flow control analysis on the instructions and build the define-use and use-define chains
|
||||
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg)
|
||||
{
|
||||
|
||||
Instruction *psFirstInstruction = &instructions[0];
|
||||
Instruction *psLastInstruction = &instructions[instructions.size() - 1];
|
||||
|
||||
ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp
|
||||
|
||||
psDUChain.clear();
|
||||
psUDChain.clear();
|
||||
|
||||
for (uint32_t i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
psUDChain.insert(std::make_pair(i, UseDefineChain()));
|
||||
psDUChain.insert(std::make_pair(i, DefineUseChain()));
|
||||
}
|
||||
|
||||
const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks();
|
||||
|
||||
// Loop through each block, first calculate the union of all the reachables of all preceding blocks
|
||||
// and then build on that as we go along the basic block instructions
|
||||
for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr<BasicBlock> &bptr)
|
||||
{
|
||||
const BasicBlock &b = *bptr.get();
|
||||
BasicBlock::ReachableVariables rvars;
|
||||
for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock)
|
||||
{
|
||||
const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock);
|
||||
BasicBlock::RVarUnion(rvars, b.Reachable());
|
||||
});
|
||||
|
||||
// Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions
|
||||
for (const Instruction *inst = b.First(); inst <= b.Last(); inst++)
|
||||
{
|
||||
// Process sources first
|
||||
ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
|
||||
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Add an use for all visible definitions
|
||||
psUDChain[regIdx].push_front(UseDefineChainEntry());
|
||||
UseDefineChainEntry &ue = *psUDChain[regIdx].begin();
|
||||
ue.psInst = (Instruction *)psInst;
|
||||
ue.psOp = (Operand *)psOperand;
|
||||
ue.accessMask = accessMask;
|
||||
ue.index = k;
|
||||
ue.psSiblings[k] = &ue;
|
||||
// ue.siblings will be filled out later.
|
||||
|
||||
BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx];
|
||||
for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def)
|
||||
{
|
||||
DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k);
|
||||
ue.defines.insert(duentry);
|
||||
duentry->usages.insert(&ue);
|
||||
});
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Then the destination operands
|
||||
ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
|
||||
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Overwrite whatever's in rvars; they are killed by this
|
||||
rvars[regIdx].clear();
|
||||
rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand));
|
||||
|
||||
// Make sure the definition gets created even though it doesn't have any uses at all
|
||||
// (happens when sampling a texture but not all channels are used etc).
|
||||
GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k);
|
||||
|
||||
}
|
||||
return;
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the siblings for all uses and definitions
|
||||
for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair<const uint32_t, UseDefineChain> &udpair)
|
||||
{
|
||||
UseDefineChain &ud = udpair.second;
|
||||
// Clear out the bottom 2 bits to get the actual base reg
|
||||
uint32_t baseReg = udpair.first & ~(3);
|
||||
|
||||
for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue)
|
||||
{
|
||||
ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber);
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
// Skip components that we don't access, or the one that's our own
|
||||
if (!(ue.accessMask & (1 << k)) || ue.index == k)
|
||||
continue;
|
||||
|
||||
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
|
||||
UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; });
|
||||
ASSERT(siblItr != psUDChain[baseReg + k].end());
|
||||
UseDefineChainEntry &sibling = *siblItr;
|
||||
ue.psSiblings[k] = &sibling;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Same for definitions
|
||||
for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair<const uint32_t, DefineUseChain> &dupair)
|
||||
{
|
||||
DefineUseChain &du = dupair.second;
|
||||
// Clear out the bottom 2 bits to get the actual base reg
|
||||
uint32_t baseReg = dupair.first & ~(3);
|
||||
|
||||
for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de)
|
||||
{
|
||||
ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber);
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
// Skip components that we don't access, or the one that's our own
|
||||
if (!(de.writeMask & (1 << k)) || de.index == k)
|
||||
continue;
|
||||
|
||||
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
|
||||
DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; });
|
||||
ASSERT(siblItr != psDUChain[baseReg + k].end());
|
||||
DefineUseChainEntry &sibling = *siblItr;
|
||||
de.psSiblings[k] = &sibling;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
typedef std::vector<DefineUseChainEntry *> SplitDefinitions;
|
||||
|
||||
// Split out a define to use a new temp register
|
||||
static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
uint32_t newReg = *psNumTemps;
|
||||
uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber;
|
||||
uint32_t accessMask = defs[0]->writeMask;
|
||||
uint32_t i, u32def;
|
||||
uint32_t rebase, count;
|
||||
uint32_t splitTableValue;
|
||||
|
||||
ASSERT(defs.size() > 0);
|
||||
for (i = 1; i < defs.size(); i++)
|
||||
{
|
||||
ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg);
|
||||
accessMask |= defs[i]->writeMask;
|
||||
}
|
||||
|
||||
|
||||
(*psNumTemps)++;
|
||||
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions());
|
||||
#endif
|
||||
ASSERT(accessMask != 0 && accessMask <= 0xf);
|
||||
// Calculate rebase value and component count
|
||||
rebase = 0;
|
||||
count = 0;
|
||||
i = accessMask;
|
||||
while ((i & 1) == 0)
|
||||
{
|
||||
rebase++;
|
||||
i = i >> 1;
|
||||
}
|
||||
while (i != 0)
|
||||
{
|
||||
count++;
|
||||
i = i >> 1;
|
||||
}
|
||||
|
||||
// Make sure there's enough room in the split table
|
||||
if (pui32SplitTable.size() <= newReg)
|
||||
{
|
||||
size_t newSize = pui32SplitTable.size() * 2;
|
||||
pui32SplitTable.resize(newSize, 0xffffffff);
|
||||
}
|
||||
|
||||
// Set the original temp of the new register
|
||||
{
|
||||
uint32_t origTemp = oldReg;
|
||||
while (pui32SplitTable[origTemp] != 0xffffffff)
|
||||
origTemp = pui32SplitTable[origTemp] & 0xffff;
|
||||
|
||||
ASSERT(rebase < 4);
|
||||
ASSERT(count <= 4);
|
||||
splitTableValue = (count << 24) | (rebase << 16) | origTemp;
|
||||
|
||||
pui32SplitTable[newReg] = splitTableValue;
|
||||
}
|
||||
|
||||
// Insert the new temps to the map
|
||||
for (i = newReg * 4; i < newReg * 4 + 4; i++)
|
||||
{
|
||||
psUDChains.insert(std::make_pair(i, UseDefineChain()));
|
||||
psDUChains.insert(std::make_pair(i, DefineUseChain()));
|
||||
}
|
||||
|
||||
for (u32def = 0; u32def < defs.size(); u32def++)
|
||||
{
|
||||
DefineUseChainEntry *defineToSplit = defs[u32def];
|
||||
uint32_t oldIdx = defineToSplit->index;
|
||||
#if DEBUG_UDCHAINS
|
||||
printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count);
|
||||
#endif
|
||||
|
||||
// We may have moved the opcodes already because of multiple defines pointing to the same op
|
||||
if (defineToSplit->psOp->ui32RegisterNumber != newReg)
|
||||
{
|
||||
ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg);
|
||||
// Update the declaration operand
|
||||
// Don't change possible suboperands as they are sources
|
||||
defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
|
||||
}
|
||||
|
||||
defineToSplit->writeMask >>= rebase;
|
||||
defineToSplit->index -= rebase;
|
||||
// Change the temp register number for all usages
|
||||
UsageSet::iterator ul = defineToSplit->usages.begin();
|
||||
while (ul != defineToSplit->usages.end())
|
||||
{
|
||||
// Already updated by one of the siblings? Skip.
|
||||
if ((*ul)->psOp->ui32RegisterNumber != newReg)
|
||||
{
|
||||
ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg);
|
||||
(*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
|
||||
}
|
||||
|
||||
// Update the UD chain
|
||||
{
|
||||
UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin();
|
||||
while (udLoc != psUDChains[oldReg * 4 + oldIdx].end())
|
||||
{
|
||||
if (&*udLoc == *ul)
|
||||
{
|
||||
// Move to new list
|
||||
psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc);
|
||||
|
||||
if (rebase > 0)
|
||||
{
|
||||
(*ul)->accessMask >>= rebase;
|
||||
(*ul)->index -= rebase;
|
||||
memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *));
|
||||
}
|
||||
break;
|
||||
}
|
||||
udLoc++;
|
||||
}
|
||||
}
|
||||
|
||||
ul++;
|
||||
}
|
||||
|
||||
// Move the define out of the old chain (if its still there)
|
||||
{
|
||||
// Find the define in the old chain
|
||||
DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin();
|
||||
while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit))
|
||||
{
|
||||
duLoc++;
|
||||
}
|
||||
ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end());
|
||||
{
|
||||
// Move directly to new chain
|
||||
psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc);
|
||||
if (rebase != 0)
|
||||
{
|
||||
memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Adds a define and all its siblings to the list, checking duplicates
|
||||
static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef)
|
||||
{
|
||||
uint32_t k;
|
||||
for (k = 0; k < 4; k++)
|
||||
{
|
||||
if (newDef->psSiblings[k])
|
||||
{
|
||||
DefineUseChainEntry *defToAdd = newDef->psSiblings[k];
|
||||
uint32_t m;
|
||||
int defFound = 0;
|
||||
for (m = 0; m < defs.size(); m++)
|
||||
{
|
||||
if (defs[m] == defToAdd)
|
||||
{
|
||||
defFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defFound == 0)
|
||||
{
|
||||
defs.push_back(newDef->psSiblings[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place
|
||||
static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
uint32_t reg;
|
||||
uint32_t combinedMask;
|
||||
uint32_t i, k, u32def;
|
||||
int canSplit = 1;
|
||||
DefineUseChain::iterator du;
|
||||
int hasLeftoverDefinitions = 0;
|
||||
// Initial checks: all definitions must:
|
||||
// Access the same register
|
||||
// Have at least one definition in any of the 4 register slots that isn't included
|
||||
if (defs.empty())
|
||||
return 0;
|
||||
|
||||
reg = defs[0]->psOp->ui32RegisterNumber;
|
||||
combinedMask = defs[0]->writeMask;
|
||||
for (i = 1; i < defs.size(); i++)
|
||||
{
|
||||
if (reg != defs[i]->psOp->ui32RegisterNumber)
|
||||
return 0;
|
||||
|
||||
combinedMask |= defs[i]->writeMask;
|
||||
}
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
du = psDUChains[reg * 4 + i].begin();
|
||||
while (du != psDUChains[reg * 4 + i].end())
|
||||
{
|
||||
int defFound = 0;
|
||||
for (k = 0; k < defs.size(); k++)
|
||||
{
|
||||
if (&*du == defs[k])
|
||||
{
|
||||
defFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defFound == 0)
|
||||
{
|
||||
hasLeftoverDefinitions = 1;
|
||||
break;
|
||||
}
|
||||
du++;
|
||||
}
|
||||
if (hasLeftoverDefinitions)
|
||||
break;
|
||||
}
|
||||
// We'd be splitting the entire register and all its definitions, no point in that.
|
||||
if (hasLeftoverDefinitions == 0)
|
||||
return 0;
|
||||
|
||||
// Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array.
|
||||
for (u32def = 0; u32def < defs.size(); u32def++)
|
||||
{
|
||||
DefineUseChainEntry *def = defs[u32def];
|
||||
|
||||
UsageSet::iterator ul = def->usages.begin();
|
||||
while (ul != def->usages.end())
|
||||
{
|
||||
uint32_t j;
|
||||
|
||||
// Check that we only read a subset of the combined writemask
|
||||
if (((*ul)->accessMask & (~combinedMask)) != 0)
|
||||
{
|
||||
// Do an additional attempt, pick up all the sibling definitions as well
|
||||
// Only do this if we have the space in the definitions table
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (((*ul)->accessMask & (1 << j)) == 0)
|
||||
continue;
|
||||
AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin());
|
||||
}
|
||||
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
|
||||
}
|
||||
|
||||
// It must have at least one declaration
|
||||
ASSERT(!(*ul)->defines.empty());
|
||||
|
||||
// Check that all siblings for the usage use one of the definitions
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
uint32_t m;
|
||||
int defineFound = 0;
|
||||
if (((*ul)->accessMask & (1 << j)) == 0)
|
||||
continue;
|
||||
|
||||
ASSERT((*ul)->psSiblings[j] != NULL);
|
||||
ASSERT(!(*ul)->psSiblings[j]->defines.empty());
|
||||
|
||||
// Check that all definitions for this usage are found from the definitions table
|
||||
DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin();
|
||||
while (dl != (*ul)->psSiblings[j]->defines.end())
|
||||
{
|
||||
defineFound = 0;
|
||||
for (m = 0; m < defs.size(); m++)
|
||||
{
|
||||
if (*dl == defs[m])
|
||||
{
|
||||
defineFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defineFound == 0)
|
||||
{
|
||||
// Add this define and all its siblings to the table and try again
|
||||
AddDefineToList(defs, *dl);
|
||||
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
canSplit = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
dl++;
|
||||
}
|
||||
|
||||
if (defineFound == 0)
|
||||
{
|
||||
canSplit = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (canSplit == 0)
|
||||
break;
|
||||
|
||||
// This'll do, check next usage
|
||||
ul++;
|
||||
}
|
||||
if (canSplit == 0)
|
||||
break;
|
||||
|
||||
}
|
||||
if (canSplit)
|
||||
{
|
||||
UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Do temp splitting based on use-define chains
|
||||
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
// Algorithm overview:
|
||||
// Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable),
|
||||
// split it out.
|
||||
uint32_t i;
|
||||
uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition
|
||||
for (i = 0; i < tempsAtStart * 4; i++)
|
||||
{
|
||||
// No definitions?
|
||||
if (psDUChains[i].empty())
|
||||
continue;
|
||||
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
// Ok we have multiple definitions for a temp, check them through
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
SplitDefinitions sd;
|
||||
AddDefineToList(sd, &*du);
|
||||
du++;
|
||||
// If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain
|
||||
if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable))
|
||||
{
|
||||
du = psDUChains[i].begin();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process.
|
||||
// Sampler ops, bitwise ops and comparisons are ignored.
|
||||
static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType)
|
||||
{
|
||||
Instruction *psInst = du->psInst;
|
||||
int hasFullPrecOperands = 0;
|
||||
uint32_t i;
|
||||
|
||||
if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return 0;
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_MOV:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP2ADD:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SQRT:
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++)
|
||||
{
|
||||
Operand *op = &psInst->asOperands[i];
|
||||
if (op->eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
continue; // Immediate values are ignored
|
||||
|
||||
if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
|
||||
{
|
||||
hasFullPrecOperands = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasFullPrecOperands)
|
||||
return 0;
|
||||
|
||||
if (pType)
|
||||
*pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Returns true if all the usages of this definitions are instructions that deal with floating point data
|
||||
static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
|
||||
{
|
||||
UsageSet::iterator itr = du->usages.begin();
|
||||
for (; itr != du->usages.end(); itr++)
|
||||
{
|
||||
Instruction *psInst = (*itr)->psInst;
|
||||
|
||||
if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return false;
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_MOV:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP2ADD:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SQRT:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Based on the sampler precisions, downgrade the definitions if possible.
|
||||
void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps)
|
||||
{
|
||||
uint32_t madeProgress = 0;
|
||||
do
|
||||
{
|
||||
uint32_t i;
|
||||
madeProgress = 0;
|
||||
for (i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
|
||||
if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
|
||||
|| CanDowngradeDefinitionPrecision(du, &sType))
|
||||
&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
|
||||
&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
|
||||
&& du->isStandalone
|
||||
&& HasOnlyFloatUsages(du))
|
||||
{
|
||||
uint32_t sibl;
|
||||
// Ok we can change the precision.
|
||||
ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP);
|
||||
ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT);
|
||||
du->psOp->eMinPrecision = sType;
|
||||
|
||||
// Update all the uses of all the siblings
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
|
||||
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
|
||||
while (ul != du->psSiblings[sibl]->usages.end())
|
||||
{
|
||||
ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT ||
|
||||
(*ul)->psOp->eMinPrecision == sType);
|
||||
// We may well write this multiple times to the same op but that's fine.
|
||||
(*ul)->psOp->eMinPrecision = sType;
|
||||
|
||||
ul++;
|
||||
}
|
||||
}
|
||||
madeProgress = 1;
|
||||
}
|
||||
du++;
|
||||
}
|
||||
}
|
||||
} while (madeProgress != 0);
|
||||
|
||||
}
|
||||
|
||||
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
uint32_t sibl;
|
||||
int isStandalone = 1;
|
||||
if (du->isStandalone)
|
||||
{
|
||||
du++;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
|
||||
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
|
||||
while (ul != du->psSiblings[sibl]->usages.end())
|
||||
{
|
||||
uint32_t k;
|
||||
ASSERT(!(*ul)->defines.empty());
|
||||
|
||||
// Need to check that all the siblings of this usage only see this definition's corresponding sibling
|
||||
for (k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(*ul)->psSiblings[k])
|
||||
continue;
|
||||
|
||||
if ((*ul)->psSiblings[k]->defines.size() > 1
|
||||
|| *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k])
|
||||
{
|
||||
isStandalone = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isStandalone == 0)
|
||||
break;
|
||||
|
||||
ul++;
|
||||
}
|
||||
if (isStandalone == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (isStandalone)
|
||||
{
|
||||
// Yep, mark it
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
du->psSiblings[sibl]->isStandalone = 1;
|
||||
}
|
||||
}
|
||||
du++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write the uses and defines back to Instruction and Operand member lists.
|
||||
void WriteBackUsesAndDefines(DefineUseChains &psDUChains)
|
||||
{
|
||||
using namespace std;
|
||||
// Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them
|
||||
for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr)
|
||||
{
|
||||
const DefineUseChain &duChain = itr.second;
|
||||
for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du)
|
||||
{
|
||||
for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage)
|
||||
{
|
||||
// Update instruction use list
|
||||
du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp));
|
||||
// And the usage's definition
|
||||
usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp));
|
||||
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
85
src/cbstring/bsafe.c
Normal file
85
src/cbstring/bsafe.c
Normal file
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bsafe.c
|
||||
*
|
||||
* This is an optional module that can be used to help enforce a safety
|
||||
* standard based on pervasive usage of bstrlib. This file is not necessarily
|
||||
* portable, however, it has been tested to work correctly with Intel's C/C++
|
||||
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "bsafe.h"
|
||||
|
||||
static int bsafeShouldExit = 1;
|
||||
|
||||
#if 0
|
||||
char * strcpy (char *dst, const char *src);
|
||||
char * strcat (char *dst, const char *src);
|
||||
|
||||
char * strcpy (char *dst, const char *src) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * strcat (char *dst, const char *src) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
|
||||
char * (gets) (char * buf) {
|
||||
buf = buf;
|
||||
fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
char * (strncpy) (char *dst, const char *src, size_t n) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
n = n;
|
||||
fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strncat) (char *dst, const char *src, size_t n) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
n = n;
|
||||
fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strtok) (char *s1, const char *s2) {
|
||||
s1 = s1;
|
||||
s2 = s2;
|
||||
fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strdup) (const char *s) {
|
||||
s = s;
|
||||
fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
|
||||
if (bsafeShouldExit) exit (-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
43
src/cbstring/bsafe.h
Normal file
43
src/cbstring/bsafe.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bsafe.h
|
||||
*
|
||||
* This is an optional module that can be used to help enforce a safety
|
||||
* standard based on pervasive usage of bstrlib. This file is not necessarily
|
||||
* portable, however, it has been tested to work correctly with Intel's C/C++
|
||||
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
|
||||
*/
|
||||
|
||||
#ifndef BSTRLIB_BSAFE_INCLUDE
|
||||
#define BSTRLIB_BSAFE_INCLUDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
|
||||
/* This is caught in the linker, so its not necessary for gcc. */
|
||||
extern char * (gets) (char * buf);
|
||||
#endif
|
||||
|
||||
extern char * (strncpy) (char *dst, const char *src, size_t n);
|
||||
extern char * (strncat) (char *dst, const char *src, size_t n);
|
||||
extern char * (strtok) (char *s1, const char *s2);
|
||||
extern char * (strdup) (const char *s);
|
||||
|
||||
#undef strcpy
|
||||
#undef strcat
|
||||
#define strcpy(a,b) bsafe_strcpy(a,b)
|
||||
#define strcat(a,b) bsafe_strcat(a,b)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1133
src/cbstring/bstraux.c
Normal file
1133
src/cbstring/bstraux.c
Normal file
File diff suppressed because it is too large
Load Diff
112
src/cbstring/bstraux.h
Normal file
112
src/cbstring/bstraux.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bstraux.h
|
||||
*
|
||||
* This file is not a necessary part of the core bstring library itself, but
|
||||
* is just an auxilliary module which includes miscellaneous or trivial
|
||||
* functions.
|
||||
*/
|
||||
|
||||
#ifndef BSTRAUX_INCLUDE
|
||||
#define BSTRAUX_INCLUDE
|
||||
|
||||
#include <time.h>
|
||||
#include "bstrlib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Safety mechanisms */
|
||||
#define bstrDeclare(b) bstring (b) = NULL;
|
||||
#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
|
||||
|
||||
/* Backward compatibilty with previous versions of Bstrlib */
|
||||
#define bAssign(a,b) ((bassign)((a), (b)))
|
||||
#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
|
||||
#define bStrchr(b,c) ((bstrchr)((b), (c)))
|
||||
#define bStrchrFast(b,c) ((bstrchr)((b), (c)))
|
||||
#define bCatCstr(b,s) ((bcatcstr)((b), (s)))
|
||||
#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len)))
|
||||
#define bCatStatic(b,s) bCatBlk ((b), ("" s ""), sizeof (s) - 1)
|
||||
#define bTrunc(b,n) ((btrunc)((b), (n)))
|
||||
#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos)))
|
||||
#define bUppercase(b) ((btoupper)(b))
|
||||
#define bLowercase(b) ((btolower)(b))
|
||||
#define bCaselessCmp(a,b) ((bstricmp)((a), (b)))
|
||||
#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n)))
|
||||
#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL))
|
||||
#define bUuDecode(b) (bUuDecodeEx ((b), NULL))
|
||||
|
||||
/* Unusual functions */
|
||||
extern struct bStream * bsFromBstr (const_bstring b);
|
||||
extern bstring bTail (bstring b, int n);
|
||||
extern bstring bHead (bstring b, int n);
|
||||
extern int bSetCstrChar (bstring a, int pos, char c);
|
||||
extern int bSetChar (bstring b, int pos, char c);
|
||||
extern int bFill (bstring a, char c, int len);
|
||||
extern int bReplicate (bstring b, int n);
|
||||
extern int bReverse (bstring b);
|
||||
extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill);
|
||||
extern bstring bStrfTime (const char * fmt, const struct tm * timeptr);
|
||||
#define bAscTime(t) (bStrfTime ("%c\n", (t)))
|
||||
#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL)
|
||||
|
||||
/* Spacing formatting */
|
||||
extern int bJustifyLeft (bstring b, int space);
|
||||
extern int bJustifyRight (bstring b, int width, int space);
|
||||
extern int bJustifyMargin (bstring b, int width, int space);
|
||||
extern int bJustifyCenter (bstring b, int width, int space);
|
||||
|
||||
/* Esoteric standards specific functions */
|
||||
extern char * bStr2NetStr (const_bstring b);
|
||||
extern bstring bNetStr2Bstr (const char * buf);
|
||||
extern bstring bBase64Encode (const_bstring b);
|
||||
extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError);
|
||||
extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines);
|
||||
extern bstring bUuDecodeEx (const_bstring src, int * badlines);
|
||||
extern bstring bUuEncode (const_bstring src);
|
||||
extern bstring bYEncode (const_bstring src);
|
||||
extern bstring bYDecode (const_bstring src);
|
||||
|
||||
/* Writable stream */
|
||||
typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
|
||||
|
||||
struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm);
|
||||
int bwsWriteBstr (struct bwriteStream * stream, const_bstring b);
|
||||
int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len);
|
||||
int bwsWriteFlush (struct bwriteStream * stream);
|
||||
int bwsIsEOF (const struct bwriteStream * stream);
|
||||
int bwsBuffLength (struct bwriteStream * stream, int sz);
|
||||
void * bwsClose (struct bwriteStream * stream);
|
||||
|
||||
/* Security functions */
|
||||
#define bSecureDestroy(b) { \
|
||||
bstring bstr__tmp = (b); \
|
||||
if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \
|
||||
(void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \
|
||||
bdestroy (bstr__tmp); \
|
||||
} \
|
||||
}
|
||||
#define bSecureWriteProtect(t) { \
|
||||
if ((t).mlen >= 0) { \
|
||||
if ((t).mlen > (t).slen)) { \
|
||||
(void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
|
||||
} \
|
||||
(t).mlen = -1; \
|
||||
} \
|
||||
}
|
||||
extern bstring bSecureInput (int maxlen, int termchar,
|
||||
bNgetc vgetchar, void * vgcCtx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
2974
src/cbstring/bstrlib.c
Normal file
2974
src/cbstring/bstrlib.c
Normal file
File diff suppressed because it is too large
Load Diff
304
src/cbstring/bstrlib.h
Normal file
304
src/cbstring/bstrlib.h
Normal file
@ -0,0 +1,304 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bstrlib.h
|
||||
*
|
||||
* This file is the header file for the core module for implementing the
|
||||
* bstring functions.
|
||||
*/
|
||||
|
||||
#ifndef BSTRLIB_INCLUDE
|
||||
#define BSTRLIB_INCLUDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
|
||||
# if defined (__TURBOC__) && !defined (__BORLANDC__)
|
||||
# define BSTRLIB_NOVSNP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define BSTR_ERR (-1)
|
||||
#define BSTR_OK (0)
|
||||
#define BSTR_BS_BUFF_LENGTH_GET (0)
|
||||
|
||||
typedef struct tagbstring * bstring;
|
||||
typedef const struct tagbstring * const_bstring;
|
||||
|
||||
/* Copy functions */
|
||||
#define cstr2bstr bfromcstr
|
||||
extern bstring bfromcstr (const char * str);
|
||||
extern bstring bfromcstralloc (int mlen, const char * str);
|
||||
extern bstring blk2bstr (const void * blk, int len);
|
||||
extern char * bstr2cstr (const_bstring s, char z);
|
||||
extern int bcstrfree (char * s);
|
||||
extern bstring bstrcpy (const_bstring b1);
|
||||
extern int bassign (bstring a, const_bstring b);
|
||||
extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
|
||||
extern int bassigncstr (bstring a, const char * str);
|
||||
extern int bassignblk (bstring a, const void * s, int len);
|
||||
|
||||
/* Destroy function */
|
||||
extern int bdestroy (bstring b);
|
||||
|
||||
/* Space allocation hinting functions */
|
||||
extern int balloc (bstring s, int len);
|
||||
extern int ballocmin (bstring b, int len);
|
||||
|
||||
/* Substring extraction */
|
||||
extern bstring bmidstr (const_bstring b, int left, int len);
|
||||
|
||||
/* Various standard manipulations */
|
||||
extern int bconcat (bstring b0, const_bstring b1);
|
||||
extern int bconchar (bstring b0, char c);
|
||||
extern int bcatcstr (bstring b, const char * s);
|
||||
extern int bcatblk (bstring b, const void * s, int len);
|
||||
extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
|
||||
extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
|
||||
extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
|
||||
extern int bdelete (bstring s1, int pos, int len);
|
||||
extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
|
||||
extern int btrunc (bstring b, int n);
|
||||
|
||||
/* Scan/search functions */
|
||||
extern int bstricmp (const_bstring b0, const_bstring b1);
|
||||
extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
|
||||
extern int biseqcaseless (const_bstring b0, const_bstring b1);
|
||||
extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
|
||||
extern int biseq (const_bstring b0, const_bstring b1);
|
||||
extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
|
||||
extern int biseqcstr (const_bstring b, const char * s);
|
||||
extern int biseqcstrcaseless (const_bstring b, const char * s);
|
||||
extern int bstrcmp (const_bstring b0, const_bstring b1);
|
||||
extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
|
||||
extern int binstr (const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrr (const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
|
||||
extern int bstrchrp (const_bstring b, int c, int pos);
|
||||
extern int bstrrchrp (const_bstring b, int c, int pos);
|
||||
#define bstrchr(b,c) bstrchrp ((b), (c), 0)
|
||||
#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
|
||||
extern int binchr (const_bstring b0, int pos, const_bstring b1);
|
||||
extern int binchrr (const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bninchr (const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
|
||||
extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
|
||||
|
||||
/* List of string container functions */
|
||||
struct bstrList {
|
||||
int qty, mlen;
|
||||
bstring * entry;
|
||||
};
|
||||
extern struct bstrList * bstrListCreate (void);
|
||||
extern int bstrListDestroy (struct bstrList * sl);
|
||||
extern int bstrListAlloc (struct bstrList * sl, int msz);
|
||||
extern int bstrListAllocMin (struct bstrList * sl, int msz);
|
||||
|
||||
/* String split and join functions */
|
||||
extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
|
||||
extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
|
||||
extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
|
||||
extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
|
||||
extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
|
||||
int (* cb) (void * parm, int ofs, int len), void * parm);
|
||||
extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
|
||||
int (* cb) (void * parm, int ofs, int len), void * parm);
|
||||
extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
|
||||
int (* cb) (void * parm, int ofs, int len), void * parm);
|
||||
|
||||
/* Miscellaneous functions */
|
||||
extern int bpattern (bstring b, int len);
|
||||
extern int btoupper (bstring b);
|
||||
extern int btolower (bstring b);
|
||||
extern int bltrimws (bstring b);
|
||||
extern int brtrimws (bstring b);
|
||||
extern int btrimws (bstring b);
|
||||
|
||||
/* <*>printf format functions */
|
||||
#if !defined (BSTRLIB_NOVSNP)
|
||||
extern bstring bformat (const char * fmt, ...);
|
||||
extern int bformata (bstring b, const char * fmt, ...);
|
||||
extern int bassignformat (bstring b, const char * fmt, ...);
|
||||
extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
|
||||
|
||||
#define bvformata(ret, b, fmt, lastarg) { \
|
||||
bstring bstrtmp_b = (b); \
|
||||
const char * bstrtmp_fmt = (fmt); \
|
||||
int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
|
||||
for (;;) { \
|
||||
va_list bstrtmp_arglist; \
|
||||
va_start (bstrtmp_arglist, lastarg); \
|
||||
bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
|
||||
va_end (bstrtmp_arglist); \
|
||||
if (bstrtmp_r >= 0) { /* Everything went ok */ \
|
||||
bstrtmp_r = BSTR_OK; \
|
||||
break; \
|
||||
} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
|
||||
bstrtmp_r = BSTR_ERR; \
|
||||
break; \
|
||||
} \
|
||||
bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
|
||||
} \
|
||||
ret = bstrtmp_r; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef int (*bNgetc) (void *parm);
|
||||
typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
|
||||
|
||||
/* Input functions */
|
||||
extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
|
||||
extern bstring bread (bNread readPtr, void * parm);
|
||||
extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
|
||||
extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
|
||||
extern int breada (bstring b, bNread readPtr, void * parm);
|
||||
|
||||
/* Stream functions */
|
||||
extern struct bStream * bsopen (bNread readPtr, void * parm);
|
||||
extern void * bsclose (struct bStream * s);
|
||||
extern int bsbufflength (struct bStream * s, int sz);
|
||||
extern int bsreadln (bstring b, struct bStream * s, char terminator);
|
||||
extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
|
||||
extern int bsread (bstring b, struct bStream * s, int n);
|
||||
extern int bsreadlna (bstring b, struct bStream * s, char terminator);
|
||||
extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
|
||||
extern int bsreada (bstring b, struct bStream * s, int n);
|
||||
extern int bsunread (struct bStream * s, const_bstring b);
|
||||
extern int bspeek (bstring r, const struct bStream * s);
|
||||
extern int bssplitscb (struct bStream * s, const_bstring splitStr,
|
||||
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
|
||||
extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
|
||||
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
|
||||
extern int bseof (const struct bStream * s);
|
||||
|
||||
struct tagbstring {
|
||||
int mlen;
|
||||
int slen;
|
||||
unsigned char * data;
|
||||
};
|
||||
|
||||
/* Accessor macros */
|
||||
#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
|
||||
#define blength(b) (blengthe ((b), 0))
|
||||
#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
|
||||
#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
|
||||
#define bdatae(b, e) (bdataofse (b, 0, e))
|
||||
#define bdata(b) (bdataofs (b, 0))
|
||||
#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
|
||||
#define bchar(b, p) bchare ((b), (p), '\0')
|
||||
|
||||
/* Static constant string initialization macro */
|
||||
#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
|
||||
#if defined(_MSC_VER)
|
||||
/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
|
||||
# define bsStatic(q) bsStaticMlen(q,-32)
|
||||
#endif
|
||||
#ifndef bsStatic
|
||||
# define bsStatic(q) bsStaticMlen(q,-__LINE__)
|
||||
#endif
|
||||
|
||||
/* Static constant block parameter pair */
|
||||
#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
|
||||
|
||||
/* Reference building macros */
|
||||
#define cstr2tbstr btfromcstr
|
||||
#define btfromcstr(t,s) { \
|
||||
(t).data = (unsigned char *) (s); \
|
||||
(t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
|
||||
(t).mlen = -1; \
|
||||
}
|
||||
#define blk2tbstr(t,s,l) { \
|
||||
(t).data = (unsigned char *) (s); \
|
||||
(t).slen = l; \
|
||||
(t).mlen = -1; \
|
||||
}
|
||||
#define btfromblk(t,s,l) blk2tbstr(t,s,l)
|
||||
#define bmid2tbstr(t,b,p,l) { \
|
||||
const_bstring bstrtmp_s = (b); \
|
||||
if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
|
||||
int bstrtmp_left = (p); \
|
||||
int bstrtmp_len = (l); \
|
||||
if (bstrtmp_left < 0) { \
|
||||
bstrtmp_len += bstrtmp_left; \
|
||||
bstrtmp_left = 0; \
|
||||
} \
|
||||
if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
|
||||
bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
|
||||
if (bstrtmp_len <= 0) { \
|
||||
(t).data = (unsigned char *)""; \
|
||||
(t).slen = 0; \
|
||||
} else { \
|
||||
(t).data = bstrtmp_s->data + bstrtmp_left; \
|
||||
(t).slen = bstrtmp_len; \
|
||||
} \
|
||||
} else { \
|
||||
(t).data = (unsigned char *)""; \
|
||||
(t).slen = 0; \
|
||||
} \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblkltrimws(t,s,l) { \
|
||||
int bstrtmp_idx = 0, bstrtmp_len = (l); \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s + bstrtmp_idx; \
|
||||
(t).slen = bstrtmp_len - bstrtmp_idx; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblkrtrimws(t,s,l) { \
|
||||
int bstrtmp_len = (l) - 1; \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_len >= 0; bstrtmp_len--) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s; \
|
||||
(t).slen = bstrtmp_len + 1; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblktrimws(t,s,l) { \
|
||||
int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
|
||||
} \
|
||||
for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s + bstrtmp_idx; \
|
||||
(t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
|
||||
/* Write protection macros */
|
||||
#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
|
||||
#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
|
||||
#define biswriteprotected(t) ((t).mlen <= 0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
3202
src/cbstring/bstrlib.txt
Normal file
3202
src/cbstring/bstrlib.txt
Normal file
File diff suppressed because it is too large
Load Diff
29
src/cbstring/license.txt
Normal file
29
src/cbstring/license.txt
Normal file
@ -0,0 +1,29 @@
|
||||
Copyright (c) 2002-2008 Paul Hsieh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of bstrlib nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
172
src/cbstring/porting.txt
Normal file
172
src/cbstring/porting.txt
Normal file
@ -0,0 +1,172 @@
|
||||
Better String library Porting Guide
|
||||
-----------------------------------
|
||||
|
||||
by Paul Hsieh
|
||||
|
||||
The bstring library is an attempt to provide improved string processing
|
||||
functionality to the C and C++ language. At the heart of the bstring library
|
||||
is the management of "bstring"s which are a significant improvement over '\0'
|
||||
terminated char buffers. See the accompanying documenation file bstrlib.txt
|
||||
for more information.
|
||||
|
||||
===============================================================================
|
||||
|
||||
Identifying the Compiler
|
||||
------------------------
|
||||
|
||||
Bstrlib has been tested on the following compilers:
|
||||
|
||||
Microsoft Visual C++
|
||||
Watcom C/C++ (32 bit flat)
|
||||
Intel's C/C++ compiler (on Windows)
|
||||
The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
|
||||
Borland C++
|
||||
Turbo C
|
||||
|
||||
There are slight differences in these compilers which requires slight
|
||||
differences in the implementation of Bstrlib. These are accomodated in the
|
||||
same sources using #ifdef/#if defined() on compiler specific macros. To
|
||||
port Bstrlib to a new compiler not listed above, it is recommended that the
|
||||
same strategy be followed. If you are unaware of the compiler specific
|
||||
identifying preprocessor macro for your compiler you might find it here:
|
||||
|
||||
http://predef.sourceforge.net/precomp.html
|
||||
|
||||
Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
|
||||
|
||||
16-bit vs. 32-bit vs. 64-bit Systems
|
||||
------------------------------------
|
||||
|
||||
Bstrlib has been architected to deal with strings of length between 0 and
|
||||
INT_MAX (inclusive). Since the values of int are never higher than size_t
|
||||
there will be no issue here. Note that on most 64-bit systems int is 32-bit.
|
||||
|
||||
Dependency on The C-Library
|
||||
---------------------------
|
||||
|
||||
Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and
|
||||
vsnprintf. Many free standing C compiler implementations that have a mode in
|
||||
which the C library is not available will typically not include these
|
||||
functions which will make porting Bstrlib to it onerous. Bstrlib is not
|
||||
designed for such bare bones compiler environments. This usually includes
|
||||
compilers that target ROM environments.
|
||||
|
||||
Porting Issues
|
||||
--------------
|
||||
|
||||
Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there
|
||||
are still a few porting issues. These are described below.
|
||||
|
||||
1. The vsnprintf () function.
|
||||
|
||||
Unfortunately, the earlier ANSI/ISO C standards did not include this function.
|
||||
If the compiler of interest does not support this function then the
|
||||
BSTRLIB_NOVSNP should be defined via something like:
|
||||
|
||||
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
|
||||
# if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
|
||||
# define BSTRLIB_NOVSNP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
which appears at the top of bstrlib.h. Note that the bformat(a) functions
|
||||
will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If
|
||||
the compiler has renamed vsnprintf() to some other named function, then
|
||||
search for the definition of the exvsnprintf macro in bstrlib.c file and be
|
||||
sure its defined appropriately:
|
||||
|
||||
#if defined (__COMPILERVENDORSPECIFICMACRO__)
|
||||
# define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
|
||||
#else
|
||||
# define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
|
||||
#endif
|
||||
|
||||
Take notice of the return value being captured in the variable r. It is
|
||||
assumed that r exceeds n if and only if the underlying vsnprintf function has
|
||||
determined what the true maximal output length would be for output if the
|
||||
buffer were large enough to hold it. Non-modern implementations must output a
|
||||
lesser number (the macro can and should be modified to ensure this).
|
||||
|
||||
2. Weak C++ compiler.
|
||||
|
||||
C++ is a much more complicated language to implement than C. This has lead
|
||||
to varying quality of compiler implementations. The weaknesses isolated in
|
||||
the initial ports are inclusion of the Standard Template Library,
|
||||
std::iostream and exception handling. By default it is assumed that the C++
|
||||
compiler supports all of these things correctly. If your compiler does not
|
||||
support one or more of these define the corresponding macro:
|
||||
|
||||
BSTRLIB_CANNOT_USE_STL
|
||||
BSTRLIB_CANNOT_USE_IOSTREAM
|
||||
BSTRLIB_DOESNT_THROW_EXCEPTIONS
|
||||
|
||||
The compiler specific detected macro should be defined at the top of
|
||||
bstrwrap.h in the Configuration defines section. Note that these disabling
|
||||
macros can be overrided with the associated enabling macro if a subsequent
|
||||
version of the compiler gains support. (For example, its possible to rig
|
||||
up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
|
||||
can be passed in as a compiler option.)
|
||||
|
||||
3. The bsafe module, and reserved words.
|
||||
|
||||
The bsafe module is in gross violation of the ANSI/ISO C standard in the
|
||||
sense that it redefines what could be implemented as reserved words on a
|
||||
given compiler. The typical problem is that a compiler may inline some of the
|
||||
functions and thus not be properly overridden by the definitions in the bsafe
|
||||
module. It is also possible that a compiler may prohibit the redefinitions in
|
||||
the bsafe module. Compiler specific action will be required to deal with
|
||||
these situations.
|
||||
|
||||
Platform Specific Files
|
||||
-----------------------
|
||||
|
||||
The makefiles for the examples are basically setup of for particular
|
||||
environments for each platform. In general these makefiles are not portable
|
||||
and should be constructed as necessary from scratch for each platform.
|
||||
|
||||
Testing a port
|
||||
--------------
|
||||
|
||||
To test that a port compiles correctly do the following:
|
||||
|
||||
1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and
|
||||
bsafe modules.
|
||||
2. Compile bstest against the bstrlib module.
|
||||
3. Run bstest and ensure that 0 errors are reported.
|
||||
4. Compile test against the bstrlib and bstrwrap modules.
|
||||
5. Run test and ensure that 0 errors are reported.
|
||||
6. Compile each of the examples (except for the "re" example, which may be
|
||||
complicated and is not a real test of bstrlib and except for the mfcbench
|
||||
example which is Windows specific.)
|
||||
7. Run each of the examples.
|
||||
|
||||
The builds must have 0 errors, and should have the absolute minimum number of
|
||||
warnings (in most cases can be reduced to 0.) The result of execution should
|
||||
be essentially identical on each platform.
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
Different CPU and compilers have different capabilities in terms of
|
||||
performance. It is possible for Bstrlib to assume performance
|
||||
characteristics that a platform doesn't have (since it was primarily
|
||||
developed on just one platform). The goal of Bstrlib is to provide very good
|
||||
performance on all platforms regardless of this but without resorting to
|
||||
extreme measures (such as using assembly language, or non-portable intrinsics
|
||||
or library extensions.)
|
||||
|
||||
There are two performance benchmarks that can be found in the example/
|
||||
directory. They are: cbench.c and cppbench.cpp. These are variations and
|
||||
expansions of a benchmark for another string library. They don't cover all
|
||||
string functionality, but do include the most basic functions which will be
|
||||
common in most string manipulation kernels.
|
||||
|
||||
...............................................................................
|
||||
|
||||
Feedback
|
||||
--------
|
||||
|
||||
In all cases, you may email issues found to the primary author of Bstrlib at
|
||||
the email address: websnarf@users.sourceforge.net
|
||||
|
||||
===============================================================================
|
221
src/cbstring/security.txt
Normal file
221
src/cbstring/security.txt
Normal file
@ -0,0 +1,221 @@
|
||||
Better String library Security Statement
|
||||
----------------------------------------
|
||||
|
||||
by Paul Hsieh
|
||||
|
||||
===============================================================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The Better String library (hereafter referred to as Bstrlib) is an attempt to
|
||||
provide improved string processing functionality to the C and C++ languages.
|
||||
At the heart of the Bstrlib is the management of "bstring"s which are a
|
||||
significant improvement over '\0' terminated char buffers. See the
|
||||
accompanying documenation file bstrlib.txt for more information.
|
||||
|
||||
DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Like any software, there is always a possibility of failure due to a flawed
|
||||
implementation. Nevertheless a good faith effort has been made to minimize
|
||||
such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an
|
||||
application secure or free from implementation failures. However, it is the
|
||||
author's conviction that use of Bstrlib can greatly facilitate the creation
|
||||
of software meeting the highest possible standards of security.
|
||||
|
||||
Part of the reason why this document has been created, is for the purpose of
|
||||
security auditing, or the creation of further "Statements on Security" for
|
||||
software that is created that uses Bstrlib. An auditor may check the claims
|
||||
below against Bstrlib, and use this as a basis for analysis of software which
|
||||
uses Bstrlib.
|
||||
|
||||
===============================================================================
|
||||
|
||||
Statement on Security
|
||||
---------------------
|
||||
|
||||
This is a document intended to give consumers of the Better String Library
|
||||
who are interested in security an idea of where the Better String Library
|
||||
stands on various security issues. Any deviation observed in the actual
|
||||
library itself from the descriptions below should be considered an
|
||||
implementation error, not a design flaw.
|
||||
|
||||
This statement is not an analytical proof of correctness or an outline of one
|
||||
but rather an assertion similar to a scientific claim or hypothesis. By use,
|
||||
testing and open independent examination (otherwise known as scientific
|
||||
falsifiability), the credibility of the claims made below can rise to the
|
||||
level of an established theory.
|
||||
|
||||
Common security issues:
|
||||
.......................
|
||||
|
||||
1. Buffer Overflows
|
||||
|
||||
The Bstrlib API allows the programmer a way to deal with strings without
|
||||
having to deal with the buffers containing them. Ordinary usage of the
|
||||
Bstrlib API itself makes buffer overflows impossible.
|
||||
|
||||
Furthermore, the Bstrlib API has a superset of basic string functionality as
|
||||
compared to the C library's char * functions, C++'s std::string class and
|
||||
Microsoft's MFC based CString class. It also has abstracted mechanisms for
|
||||
dealing with IO. This is important as it gives developers a way of migrating
|
||||
all their code from a functionality point of view.
|
||||
|
||||
2. Memory size overflow/wrap around attack
|
||||
|
||||
Bstrlib is, by design, impervious to memory size overflow attacks. The
|
||||
reason is it is resiliant to length overflows is that bstring lengths are
|
||||
bounded above by INT_MAX, instead of ~(size_t)0. So length addition
|
||||
overflows cause a wrap around of the integer value making them negative
|
||||
causing balloc() to fail before an erroneous operation can occurr. Attempted
|
||||
conversions of char * strings which may have lengths greater than INT_MAX are
|
||||
detected and the conversion is aborted.
|
||||
|
||||
It is unknown if this property holds on machines that don't represent
|
||||
integers as 2s complement. It is recommended that Bstrlib be carefully
|
||||
auditted by anyone using a system which is not 2s complement based.
|
||||
|
||||
3. Constant string protection
|
||||
|
||||
Bstrlib implements runtime enforced constant and read-only string semantics.
|
||||
I.e., bstrings which are declared as constant via the bsStatic() macro cannot
|
||||
be modified or deallocated directly through the Bstrlib API, and this cannot
|
||||
be subverted by casting or other type coercion. This is independent of the
|
||||
use of the const_bstring data type.
|
||||
|
||||
The Bstrlib C API uses the type const_bstring to specify bstring parameters
|
||||
whose contents do not change. Although the C language cannot enforce this,
|
||||
this is nevertheless guaranteed by the implementation of the Bstrlib library
|
||||
of C functions. The C++ API enforces the const attribute on CBString types
|
||||
correctly.
|
||||
|
||||
4. Aliased bstring support
|
||||
|
||||
Bstrlib detects and supports aliased parameter management throughout the API.
|
||||
The kind of aliasing that is allowed is the one where pointers of the same
|
||||
basic type may be pointing to overlapping objects (this is the assumption the
|
||||
ANSI C99 specification makes.) Each function behaves as if all read-only
|
||||
parameters were copied to temporaries which are used in their stead before
|
||||
the function is enacted (it rarely actually does this). No function in the
|
||||
Bstrlib uses the "restrict" parameter attribute from the ANSI C99
|
||||
specification.
|
||||
|
||||
5. Information leaking
|
||||
|
||||
In bstraux.h, using the semantically equivalent macros bSecureDestroy() and
|
||||
bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively
|
||||
will ensure that stale data does not linger in the heap's free space after
|
||||
strings have been released back to memory. Created bstrings or CBStrings
|
||||
are not linked to anything external to themselves, and thus cannot expose
|
||||
deterministic data leaking. If a bstring is resized, the preimage may exist
|
||||
as a copy that is released to the heap. Thus for sensitive data, the bstring
|
||||
should be sufficiently presized before manipulated so that it is not resized.
|
||||
bSecureInput() has been supplied in bstraux.c, which can be used to obtain
|
||||
input securely without any risk of leaving any part of the input image in the
|
||||
heap except for the allocated bstring that is returned.
|
||||
|
||||
6. Memory leaking
|
||||
|
||||
Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG
|
||||
macro. User generated definitions for malloc, realloc and free can then be
|
||||
supplied which can implement special strategies for memory corruption
|
||||
detection or memory leaking. Otherwise, bstrlib does not do anything out of
|
||||
the ordinary to attempt to deal with the standard problem of memory leaking
|
||||
(i.e., losing references to allocated memory) when programming in the C and
|
||||
C++ languages. However, it does not compound the problem any more than exists
|
||||
either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib
|
||||
does not preclude the use of automatic garbage collection mechanisms such as
|
||||
the Boehm garbage collector.
|
||||
|
||||
7. Encryption
|
||||
|
||||
Bstrlib does not present any built-in encryption mechanism. However, it
|
||||
supports full binary contents in its data buffers, so any standard block
|
||||
based encryption mechanism can make direct use of bstrings/CBStrings for
|
||||
buffer management.
|
||||
|
||||
8. Double freeing
|
||||
|
||||
Freeing a pointer that is already free is an extremely rare, but nevertheless
|
||||
a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
|
||||
reboot, by calling free mulitiple times on already freed data using the WATCOM
|
||||
CRT.) Bstrlib invalidates the bstring header data before freeing, so that in
|
||||
many cases a double free will be detected and an error will be reported
|
||||
(though this behaviour is not guaranteed and should not be relied on).
|
||||
|
||||
Using bstrFree pervasively (instead of bdestroy) can lead to somewhat
|
||||
improved invalid free avoidance (it is completely safe whenever bstring
|
||||
instances are only stored in unique variables). For example:
|
||||
|
||||
struct tagbstring hw = bsStatic ("Hello, world");
|
||||
bstring cpHw = bstrcpy (&hw);
|
||||
|
||||
#ifdef NOT_QUITE_AS_SAFE
|
||||
bdestroy (cpHw); /* Never fail */
|
||||
bdestroy (cpHw); /* Error sometimes detected at runtime */
|
||||
bdestroy (&hw); /* Error detected at run time */
|
||||
#else
|
||||
bstrFree (cpHw); /* Never fail */
|
||||
bstrFree (cpHw); /* Will do nothing */
|
||||
bstrFree (&hw); /* Will lead to a compile time error */
|
||||
#endif
|
||||
|
||||
9. Resource based denial of service
|
||||
|
||||
bSecureInput() has been supplied in bstraux.c. It has an optional upper limit
|
||||
for input length. But unlike fgets(), it is also easily determined if the
|
||||
buffer has been truncated early. In this way, a program can set an upper limit
|
||||
on input sizes while still allowing for implementing context specific
|
||||
truncation semantics (i.e., does the program consume but dump the extra
|
||||
input, or does it consume it in later inputs?)
|
||||
|
||||
10. Mixing char *'s and bstrings
|
||||
|
||||
The bstring and char * representations are not identical. So there is a risk
|
||||
when converting back and forth that data may lost. Essentially bstrings can
|
||||
contain '\0' as a valid non-terminating character, while char * strings
|
||||
cannot and in fact must use the character as a terminator. The risk of data
|
||||
loss is very low, since:
|
||||
|
||||
A) the simple method of only using bstrings in a char * semantically
|
||||
compatible way is both easy to achieve and pervasively supported.
|
||||
B) obtaining '\0' content in a string is either deliberate or indicative
|
||||
of another, likely more serious problem in the code.
|
||||
C) the library comes with various functions which deal with this issue
|
||||
(namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
|
||||
|
||||
Marginal security issues:
|
||||
.........................
|
||||
|
||||
11. 8-bit versus 9-bit portability
|
||||
|
||||
Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent
|
||||
possible to avoid portability problems. However, Bstrlib has not been tested
|
||||
on any system that does not represent char as 8-bits. So whether or not it
|
||||
works on 9-bit systems is an open question. It is recommended that Bstrlib be
|
||||
carefully auditted by anyone using a system in which CHAR_BIT is not 8.
|
||||
|
||||
12. EBCDIC/ASCII/UTF-8 data representation attacks.
|
||||
|
||||
Bstrlib uses ctype.h functions to ensure that it remains portable to non-
|
||||
ASCII systems. It also checks range to make sure it is well defined even for
|
||||
data that ANSI does not define for the ctype functions.
|
||||
|
||||
Obscure issues:
|
||||
...............
|
||||
|
||||
13. Data attributes
|
||||
|
||||
There is no support for a Perl-like "taint" attribute, however, an example of
|
||||
how to do this using C++'s type system is given as an example.
|
||||
|
1639
src/decode.cpp
Normal file
1639
src/decode.cpp
Normal file
File diff suppressed because it is too large
Load Diff
163
src/internal_includes/ControlFlowGraph.h
Normal file
163
src/internal_includes/ControlFlowGraph.h
Normal file
@ -0,0 +1,163 @@
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <tr1/memory>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct Instruction;
|
||||
class Operand;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
// Herp derp Apple is stuck in 2005
|
||||
using namespace std::tr1;
|
||||
#else
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
namespace ControlFlow
|
||||
{
|
||||
class BasicBlock;
|
||||
|
||||
class ControlFlowGraph
|
||||
{
|
||||
friend class BasicBlock;
|
||||
public:
|
||||
ControlFlowGraph()
|
||||
: m_BlockMap()
|
||||
, m_BlockStorage()
|
||||
{}
|
||||
|
||||
typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
|
||||
|
||||
const BasicBlock &Build(const Instruction *firstInstruction);
|
||||
|
||||
// Only works for instructions that start the basic block
|
||||
const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
|
||||
|
||||
// non-const version for BasicBlock
|
||||
BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction);
|
||||
|
||||
const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; }
|
||||
private:
|
||||
|
||||
// Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block
|
||||
typedef std::map<const Instruction *, BasicBlock *> BasicBlockMap;
|
||||
|
||||
BasicBlockMap m_BlockMap;
|
||||
|
||||
// auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these
|
||||
BasicBlockStorage m_BlockStorage;
|
||||
};
|
||||
|
||||
|
||||
class BasicBlock
|
||||
{
|
||||
friend class ControlFlowGraph;
|
||||
public:
|
||||
// A set of register indices, one per each vec4 component per register
|
||||
typedef std::set<uint32_t> RegisterSet;
|
||||
// The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block
|
||||
typedef std::set<const Instruction *> ConnectionSet;
|
||||
|
||||
struct Definition
|
||||
{
|
||||
Definition(const Instruction *i = NULL, const Operand *o = NULL)
|
||||
: m_Instruction(i)
|
||||
, m_Operand(o)
|
||||
{}
|
||||
|
||||
Definition(const Definition &a)
|
||||
: m_Instruction(a.m_Instruction)
|
||||
, m_Operand(a.m_Operand)
|
||||
{}
|
||||
|
||||
bool operator==(const Definition &a) const
|
||||
{
|
||||
if (a.m_Instruction != m_Instruction)
|
||||
return false;
|
||||
return a.m_Operand == m_Operand;
|
||||
}
|
||||
|
||||
bool operator!=(const Definition &a) const
|
||||
{
|
||||
if (a.m_Instruction == m_Instruction)
|
||||
return false;
|
||||
return a.m_Operand != m_Operand;
|
||||
}
|
||||
|
||||
bool operator<(const Definition &a) const
|
||||
{
|
||||
if (m_Instruction != a.m_Instruction)
|
||||
return m_Instruction < a.m_Instruction;
|
||||
return m_Operand < a.m_Operand;
|
||||
}
|
||||
|
||||
const Instruction *m_Instruction;
|
||||
const Operand *m_Operand;
|
||||
};
|
||||
|
||||
typedef std::set<Definition> ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable
|
||||
typedef std::map<uint32_t, ReachableDefinitionsPerVariable> ReachableVariables; // A VisibleDefinitionSet for each variable*component.
|
||||
|
||||
const Instruction *First() const { return m_First; }
|
||||
const Instruction *Last() const { return m_Last; }
|
||||
|
||||
const RegisterSet &UEVar() const { return m_UEVar; }
|
||||
const RegisterSet &VarKill() const { return m_VarKill; }
|
||||
|
||||
const ConnectionSet &Preceding() const { return m_Preceding; }
|
||||
const ConnectionSet &Succeeding() const { return m_Succeeding; }
|
||||
|
||||
const ReachableVariables &DEDef() const { return m_DEDef; }
|
||||
const ReachableVariables &Reachable() const { return m_Reachable; }
|
||||
|
||||
// Helper function: Do union of 2 ReachableVariables, store result in a.
|
||||
static void RVarUnion(ReachableVariables &a, const ReachableVariables &b);
|
||||
|
||||
private:
|
||||
|
||||
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
|
||||
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead);
|
||||
|
||||
// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
|
||||
void Build();
|
||||
|
||||
bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed.
|
||||
|
||||
|
||||
BasicBlock * AddChildBasicBlock(const Instruction *psFirst);
|
||||
|
||||
private:
|
||||
ControlFlowGraph &m_Graph; // The graph object containing this block
|
||||
|
||||
const Instruction *m_First; // The first instruction in the basic block
|
||||
const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
|
||||
|
||||
RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
|
||||
RegisterSet m_VarKill; // Set of variables that are defined in this block.
|
||||
|
||||
ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG
|
||||
ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG
|
||||
|
||||
ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set.
|
||||
|
||||
ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block.
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
31
src/internal_includes/ControlFlowGraphUtils.h
Normal file
31
src/internal_includes/ControlFlowGraphUtils.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
struct Instruction;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace ControlFlow
|
||||
{
|
||||
class Utils
|
||||
{
|
||||
public:
|
||||
// For a given flow-control instruction, find the corresponding jump location:
|
||||
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
|
||||
// For ELSE, find same level ENDIF + 1
|
||||
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
|
||||
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
|
||||
// For ENDLOOP, find previous same-level LOOP + 1
|
||||
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
|
||||
// For CONTINUE/C the previous LOOP + 1
|
||||
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
|
||||
// Note that CASE labels fall through.
|
||||
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
|
||||
// If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH
|
||||
// If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it.
|
||||
static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0);
|
||||
|
||||
static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0);
|
||||
|
||||
};
|
||||
}
|
||||
}
|
15
src/internal_includes/DataTypeAnalysis.h
Normal file
15
src/internal_includes/DataTypeAnalysis.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "include/ShaderInfo.h"
|
||||
#include <vector>
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
struct Instruction;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace DataTypeAnalysis
|
||||
{
|
||||
void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> &instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results);
|
||||
};
|
||||
};
|
101
src/internal_includes/Declaration.h
Normal file
101
src/internal_includes/Declaration.h
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
typedef struct ICBVec4_TAG {
|
||||
uint32_t a;
|
||||
uint32_t b;
|
||||
uint32_t c;
|
||||
uint32_t d;
|
||||
} ICBVec4;
|
||||
|
||||
#define ACCESS_FLAG_READ 0x1
|
||||
#define ACCESS_FLAG_WRITE 0x2
|
||||
|
||||
struct Declaration
|
||||
{
|
||||
Declaration()
|
||||
:
|
||||
eOpcode(OPCODE_INVALID),
|
||||
ui32NumOperands(0),
|
||||
ui32BufferStride(0)
|
||||
{}
|
||||
|
||||
OPCODE_TYPE eOpcode;
|
||||
|
||||
uint32_t ui32NumOperands;
|
||||
|
||||
Operand asOperands[2];
|
||||
|
||||
std::vector<ICBVec4> asImmediateConstBuffer;
|
||||
//The declaration can set one of these
|
||||
//values depending on the opcode.
|
||||
union {
|
||||
uint32_t ui32GlobalFlags;
|
||||
uint32_t ui32NumTemps;
|
||||
RESOURCE_DIMENSION eResourceDimension;
|
||||
INTERPOLATION_MODE eInterpolation;
|
||||
PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology;
|
||||
PRIMITIVE eInputPrimitive;
|
||||
uint32_t ui32MaxOutputVertexCount;
|
||||
TESSELLATOR_DOMAIN eTessDomain;
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
uint32_t aui32WorkGroupSize[3];
|
||||
uint32_t ui32HullPhaseInstanceCount;
|
||||
float fMaxTessFactor;
|
||||
uint32_t ui32IndexRange;
|
||||
uint32_t ui32GSInstanceCount;
|
||||
|
||||
struct Interface_TAG
|
||||
{
|
||||
uint32_t ui32InterfaceID;
|
||||
uint32_t ui32NumFuncTables;
|
||||
uint32_t ui32ArraySize;
|
||||
} interface;
|
||||
} value;
|
||||
|
||||
uint32_t ui32BufferStride;
|
||||
|
||||
struct UAV_TAG
|
||||
{
|
||||
UAV_TAG() :
|
||||
ui32GloballyCoherentAccess(0),
|
||||
bCounter(0),
|
||||
Type(RETURN_TYPE_UNORM),
|
||||
ui32NumComponents(0),
|
||||
ui32AccessFlags(0)
|
||||
{
|
||||
}
|
||||
uint32_t ui32GloballyCoherentAccess;
|
||||
uint8_t bCounter;
|
||||
RESOURCE_RETURN_TYPE Type;
|
||||
uint32_t ui32NumComponents;
|
||||
uint32_t ui32AccessFlags;
|
||||
} sUAV;
|
||||
|
||||
struct TGSM_TAG
|
||||
{
|
||||
uint32_t ui32Stride;
|
||||
uint32_t ui32Count;
|
||||
} sTGSM;
|
||||
|
||||
struct IndexableTemp_TAG
|
||||
{
|
||||
uint32_t ui32RegIndex;
|
||||
uint32_t ui32RegCount;
|
||||
uint32_t ui32RegComponentSize;
|
||||
} sIdxTemp;
|
||||
|
||||
uint32_t ui32TableLength;
|
||||
|
||||
uint32_t ui32IsShadowTex;
|
||||
|
||||
// Set indexed by sampler register number.
|
||||
std::set<uint32_t> samplersUsed;
|
||||
};
|
||||
|
50
src/internal_includes/HLSLCrossCompilerContext.h
Normal file
50
src/internal_includes/HLSLCrossCompilerContext.h
Normal file
@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include "bstrlib.h"
|
||||
|
||||
class Shader;
|
||||
class GLSLCrossDependencyData;
|
||||
class ShaderPhase;
|
||||
class Translator;
|
||||
class Operand;
|
||||
class HLSLccReflection;
|
||||
|
||||
class HLSLCrossCompilerContext
|
||||
{
|
||||
public:
|
||||
HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {}
|
||||
|
||||
bstring glsl;
|
||||
bstring extensions;
|
||||
|
||||
bstring* currentGLSLString;//either glsl or earlyMain of current phase
|
||||
|
||||
uint32_t currentPhase;
|
||||
|
||||
int indent;
|
||||
unsigned int flags;
|
||||
Shader* psShader;
|
||||
GLSLCrossDependencyData* psDependencies;
|
||||
const char *inputPrefix; // Prefix for shader inputs
|
||||
const char *outputPrefix; // Prefix for shader outputs
|
||||
|
||||
void DoDataTypeAnalysis(ShaderPhase *psPhase);
|
||||
|
||||
void ClearDependencyData();
|
||||
|
||||
void AddIndentation();
|
||||
|
||||
// Currently active translator
|
||||
Translator *psTranslator;
|
||||
|
||||
HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info
|
||||
|
||||
// Retrieve the name for which the input or output is declared as. Takes into account possible redirections.
|
||||
std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const;
|
||||
std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const;
|
||||
|
||||
bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
|
||||
|
||||
};
|
127
src/internal_includes/HLSLccToolkit.h
Normal file
127
src/internal_includes/HLSLccToolkit.h
Normal file
@ -0,0 +1,127 @@
|
||||
#pragma once
|
||||
#include "hlslcc.h"
|
||||
#include "bstrlib.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
uint32_t GetNumberBitsSet(uint32_t a);
|
||||
|
||||
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType);
|
||||
|
||||
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags);
|
||||
|
||||
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
|
||||
|
||||
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
|
||||
const int components, bool useGLSLPrecision);
|
||||
|
||||
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
|
||||
const int components);
|
||||
|
||||
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
|
||||
|
||||
void AddSwizzleUsingElementCount(bstring dest, uint32_t count);
|
||||
|
||||
int WriteMaskToComponentCount(uint32_t writeMask);
|
||||
|
||||
uint32_t BuildComponentMaskFromElementCount(int count);
|
||||
|
||||
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
|
||||
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src);
|
||||
|
||||
// Convert resource return type to SVT_ flags
|
||||
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType);
|
||||
|
||||
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
|
||||
|
||||
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
|
||||
|
||||
bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);
|
||||
|
||||
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB);
|
||||
|
||||
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim);
|
||||
|
||||
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b);
|
||||
|
||||
// Returns true if the instruction adds 1 to the destination temp register
|
||||
bool IsAddOneInstruction(const Instruction *psInst);
|
||||
|
||||
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
|
||||
|
||||
// Helper function to print floats with full precision
|
||||
void PrintFloat(bstring b, float f);
|
||||
|
||||
// Flags for ForeachOperand
|
||||
// Process suboperands
|
||||
#define FEO_FLAG_SUBOPERAND 1
|
||||
// Process src operands
|
||||
#define FEO_FLAG_SRC_OPERAND 2
|
||||
// Process destination operands
|
||||
#define FEO_FLAG_DEST_OPERAND 4
|
||||
// Convenience: Process all operands, both src and dest, and all suboperands
|
||||
#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND)
|
||||
|
||||
// For_each for all operands within a range of instructions. Flags above.
|
||||
template<typename ItrType, typename F> void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback)
|
||||
{
|
||||
ItrType inst = _begin;
|
||||
while (inst != _end)
|
||||
{
|
||||
uint32_t i, k;
|
||||
|
||||
if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
|
||||
{
|
||||
for (i = 0; i < inst->ui32FirstSrc; i++)
|
||||
{
|
||||
if (flags & FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
for (k = 0; k < MAX_SUB_OPERANDS; k++)
|
||||
{
|
||||
if (inst->asOperands[i].m_SubOperands[k].get())
|
||||
{
|
||||
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (flags & FEO_FLAG_DEST_OPERAND)
|
||||
{
|
||||
callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
|
||||
{
|
||||
for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++)
|
||||
{
|
||||
if (flags & FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
for (k = 0; k < MAX_SUB_OPERANDS; k++)
|
||||
{
|
||||
if (inst->asOperands[i].m_SubOperands[k].get())
|
||||
{
|
||||
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (flags & FEO_FLAG_SRC_OPERAND)
|
||||
{
|
||||
callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inst++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
};
|
134
src/internal_includes/Instruction.h
Normal file
134
src/internal_includes/Instruction.h
Normal file
@ -0,0 +1,134 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
#include <memory>
|
||||
|
||||
#define ATOMIC_ADDRESS_BASIC 0
|
||||
#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1
|
||||
#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2
|
||||
|
||||
#define TEXSMP_FLAG_NONE 0x0
|
||||
#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand
|
||||
#define TEXSMP_FLAG_DEPTHCOMPARE 0x2
|
||||
#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0
|
||||
#define TEXSMP_FLAG_BIAS 0x8
|
||||
#define TEXSMP_FLAG_GRAD 0x10
|
||||
//Gather specific flags
|
||||
#define TEXSMP_FLAG_GATHER 0x20
|
||||
#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand
|
||||
|
||||
struct Instruction
|
||||
{
|
||||
Instruction()
|
||||
: eOpcode(OPCODE_NOP)
|
||||
, eBooleanTestType(INSTRUCTION_TEST_ZERO)
|
||||
, ui32NumOperands(0)
|
||||
, ui32FirstSrc(0)
|
||||
, m_Uses()
|
||||
, m_SkipTranslation(false)
|
||||
, m_InductorRegister(0)
|
||||
, bSaturate(0)
|
||||
{
|
||||
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
|
||||
}
|
||||
|
||||
// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
|
||||
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0)
|
||||
{
|
||||
id = _id;
|
||||
eOpcode = opcode;
|
||||
eBooleanTestType = INSTRUCTION_TEST_ZERO;
|
||||
ui32FirstSrc = 0;
|
||||
ui32NumOperands = 0;
|
||||
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
|
||||
m_SkipTranslation = false;
|
||||
m_InductorRegister = 0;
|
||||
|
||||
if (reg1Mask == 0)
|
||||
return;
|
||||
|
||||
ui32NumOperands++;
|
||||
asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1;
|
||||
asOperands[0].ui32CompMask = reg1Mask;
|
||||
asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg2Mask == 0)
|
||||
return;
|
||||
|
||||
ui32FirstSrc = 1;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2;
|
||||
asOperands[1].ui32CompMask = reg2Mask;
|
||||
asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg3Mask == 0)
|
||||
return;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3;
|
||||
asOperands[2].ui32CompMask = reg3Mask;
|
||||
asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg4Mask == 0)
|
||||
return;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4;
|
||||
asOperands[3].ui32CompMask = reg4Mask;
|
||||
asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
}
|
||||
|
||||
|
||||
bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const;
|
||||
|
||||
// Flags for ChangeOperandTempRegister
|
||||
#define UD_CHANGE_SUBOPERANDS 1
|
||||
#define UD_CHANGE_MAIN_OPERAND 2
|
||||
#define UD_CHANGE_ALL 3
|
||||
|
||||
void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase);
|
||||
|
||||
|
||||
OPCODE_TYPE eOpcode;
|
||||
INSTRUCTION_TEST_BOOLEAN eBooleanTestType;
|
||||
uint32_t ui32SyncFlags;
|
||||
uint32_t ui32NumOperands;
|
||||
uint32_t ui32FirstSrc;
|
||||
Operand asOperands[6];
|
||||
uint32_t bSaturate;
|
||||
uint32_t ui32FuncIndexWithinInterface;
|
||||
RESINFO_RETURN_TYPE eResInfoReturnType;
|
||||
|
||||
int bAddressOffset;
|
||||
int8_t iUAddrOffset;
|
||||
int8_t iVAddrOffset;
|
||||
int8_t iWAddrOffset;
|
||||
RESOURCE_RETURN_TYPE xType, yType, zType, wType;
|
||||
RESOURCE_DIMENSION eResDim;
|
||||
int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
|
||||
|
||||
struct Use
|
||||
{
|
||||
Use() : m_Inst(0), m_Op(0) {}
|
||||
Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
|
||||
Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
|
||||
|
||||
Instruction *m_Inst; // The instruction that references the result of this instruction
|
||||
Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand.
|
||||
};
|
||||
|
||||
std::vector<Use> m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg.
|
||||
|
||||
Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment.
|
||||
bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation)
|
||||
uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it
|
||||
|
||||
uint64_t id;
|
||||
};
|
9
src/internal_includes/LoopTransform.h
Normal file
9
src/internal_includes/LoopTransform.h
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
class ShaderPhase;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
void DoLoopTransform(ShaderPhase &phase);
|
||||
};
|
152
src/internal_includes/Operand.h
Normal file
152
src/internal_includes/Operand.h
Normal file
@ -0,0 +1,152 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/tokens.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <tr1/memory>
|
||||
#endif
|
||||
|
||||
enum{ MAX_SUB_OPERANDS = 3 };
|
||||
class Operand;
|
||||
class HLSLCrossCompilerContext;
|
||||
struct Instruction;
|
||||
|
||||
#if _MSC_VER
|
||||
// We want to disable the "array will be default-initialized" warning, as that's exactly what we want
|
||||
#pragma warning(disable: 4351)
|
||||
#endif
|
||||
|
||||
class Operand
|
||||
{
|
||||
public:
|
||||
#ifdef __APPLE__
|
||||
// Herp derp Apple is stuck in 2005
|
||||
typedef std::tr1::shared_ptr<Operand> SubOperandPtr;
|
||||
#else
|
||||
typedef std::shared_ptr<Operand> SubOperandPtr;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
Operand()
|
||||
:
|
||||
iExtended(),
|
||||
eType(),
|
||||
eModifier(),
|
||||
eMinPrecision(),
|
||||
iIndexDims(),
|
||||
iWriteMask(),
|
||||
iGSInput(),
|
||||
iPSInOut(),
|
||||
iWriteMaskEnabled(),
|
||||
iArrayElements(),
|
||||
iNumComponents(),
|
||||
eSelMode(),
|
||||
ui32CompMask(),
|
||||
ui32Swizzle(),
|
||||
aui32Swizzle(),
|
||||
aui32ArraySizes(),
|
||||
ui32RegisterNumber(),
|
||||
afImmediates(),
|
||||
adImmediates(),
|
||||
eSpecialName(),
|
||||
specialName(),
|
||||
eIndexRep(),
|
||||
m_SubOperands(),
|
||||
aeDataType(),
|
||||
m_Rebase(0),
|
||||
m_Size(0),
|
||||
m_Defines(),
|
||||
m_ForLoopInductorName(0)
|
||||
#ifdef _DEBUG
|
||||
, id(0)
|
||||
#endif
|
||||
{}
|
||||
|
||||
// Retrieve the mask of all the components this operand accesses (either reads from or writes to).
|
||||
// Note that destination writemask does affect the effective access mask.
|
||||
uint32_t GetAccessMask() const;
|
||||
|
||||
// Returns the index of the highest accessed component, based on component mask
|
||||
int GetMaxComponent() const;
|
||||
|
||||
bool IsSwizzleReplicated() const;
|
||||
|
||||
// Get the number of elements returned by operand, taking additional component mask into account
|
||||
//e.g.
|
||||
//.z = 1
|
||||
//.x = 1
|
||||
//.yw = 2
|
||||
uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const;
|
||||
|
||||
// When this operand is used as an input declaration, how many components does it have?
|
||||
int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const;
|
||||
|
||||
// Retrieve the operand data type.
|
||||
SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const;
|
||||
|
||||
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
|
||||
int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const;
|
||||
// Same as above but with explicit shader type and phase
|
||||
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
|
||||
|
||||
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
|
||||
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);
|
||||
|
||||
int iExtended;
|
||||
OPERAND_TYPE eType;
|
||||
OPERAND_MODIFIER eModifier;
|
||||
OPERAND_MIN_PRECISION eMinPrecision;
|
||||
int iIndexDims;
|
||||
int iWriteMask;
|
||||
int iGSInput;
|
||||
int iPSInOut;
|
||||
int iWriteMaskEnabled;
|
||||
int iArrayElements;
|
||||
int iNumComponents;
|
||||
|
||||
OPERAND_4_COMPONENT_SELECTION_MODE eSelMode;
|
||||
uint32_t ui32CompMask;
|
||||
uint32_t ui32Swizzle;
|
||||
uint32_t aui32Swizzle[4];
|
||||
|
||||
uint32_t aui32ArraySizes[3];
|
||||
uint32_t ui32RegisterNumber;
|
||||
//If eType is OPERAND_TYPE_IMMEDIATE32
|
||||
float afImmediates[4];
|
||||
//If eType is OPERAND_TYPE_IMMEDIATE64
|
||||
double adImmediates[4];
|
||||
|
||||
SPECIAL_NAME eSpecialName;
|
||||
std::string specialName;
|
||||
|
||||
OPERAND_INDEX_REPRESENTATION eIndexRep[3];
|
||||
|
||||
SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS];
|
||||
|
||||
//One type for each component.
|
||||
SHADER_VARIABLE_TYPE aeDataType[4];
|
||||
|
||||
uint32_t m_Rebase; // Rebase value, for constant array accesses.
|
||||
uint32_t m_Size; // Component count, only for constant array access.
|
||||
|
||||
struct Define
|
||||
{
|
||||
Define() : m_Inst(0), m_Op(0) {}
|
||||
Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
|
||||
Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
|
||||
|
||||
Instruction *m_Inst; // Instruction that writes to the temp
|
||||
Operand *m_Op; // The (destination) operand within that instruction.
|
||||
};
|
||||
|
||||
std::vector<Define> m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP)
|
||||
uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber)
|
||||
|
||||
#ifdef _DEBUG
|
||||
uint64_t id;
|
||||
#endif
|
||||
};
|
||||
|
267
src/internal_includes/Shader.h
Normal file
267
src/internal_includes/Shader.h
Normal file
@ -0,0 +1,267 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "growing_array.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/reflect.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "bstrlib.h"
|
||||
|
||||
struct ConstantArrayChunk
|
||||
{
|
||||
ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {}
|
||||
ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
|
||||
: m_Size(sz), m_AccessMask(mask)
|
||||
{
|
||||
m_UseSites.push_back(firstUse);
|
||||
}
|
||||
|
||||
uint32_t m_Size;
|
||||
uint32_t m_AccessMask;
|
||||
uint32_t m_Rebase;
|
||||
uint32_t m_ComponentCount;
|
||||
|
||||
std::vector<Operand *> m_UseSites;
|
||||
};
|
||||
typedef std::multimap<uint32_t, ConstantArrayChunk> ChunkMap;
|
||||
|
||||
struct ConstantArrayInfo
|
||||
{
|
||||
ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {}
|
||||
|
||||
Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array
|
||||
ChunkMap m_Chunks; // map of <starting offset, chunk info>, same start offset might have multiple entries for different access masks
|
||||
};
|
||||
|
||||
class ShaderPhase
|
||||
{
|
||||
public:
|
||||
ShaderPhase()
|
||||
:
|
||||
ePhase(MAIN_PHASE),
|
||||
ui32InstanceCount(0),
|
||||
postShaderCode(),
|
||||
hasPostShaderCode(0),
|
||||
earlyMain(),
|
||||
ui32OrigTemps(0),
|
||||
ui32TotalTemps(0),
|
||||
psTempDeclaration(NULL),
|
||||
pui32SplitInfo(),
|
||||
peTempTypes(),
|
||||
acInputNeedsRedirect(),
|
||||
acOutputNeedsRedirect(),
|
||||
acPatchConstantsNeedsRedirect(),
|
||||
m_CFG(),
|
||||
m_CFGInitialized(false),
|
||||
m_NextFreeTempRegister(1),
|
||||
m_NextTexCoordTemp(0)
|
||||
{}
|
||||
|
||||
void ResolveUAVProperties();
|
||||
|
||||
void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
|
||||
|
||||
void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller
|
||||
|
||||
void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first!
|
||||
|
||||
ConstantArrayInfo m_ConstantArrayInfo;
|
||||
|
||||
std::vector<Declaration> psDecl;
|
||||
std::vector<Instruction> psInst;
|
||||
|
||||
SHADER_PHASE_TYPE ePhase;
|
||||
uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1.
|
||||
bstring postShaderCode;//End of main or before emit()
|
||||
int hasPostShaderCode;
|
||||
|
||||
bstring earlyMain;//Code to be inserted at the start of phase
|
||||
|
||||
uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared
|
||||
uint32_t ui32TotalTemps; // The number of temporaries this phase has now
|
||||
Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode
|
||||
|
||||
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
|
||||
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
|
||||
std::vector<uint32_t> pui32SplitInfo;
|
||||
std::vector<SHADER_VARIABLE_TYPE> peTempTypes;
|
||||
|
||||
// These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together.
|
||||
std::vector<unsigned char> acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared.
|
||||
std::vector<unsigned char> acOutputNeedsRedirect; // Same for outputs
|
||||
std::vector<unsigned char> acPatchConstantsNeedsRedirect; // Same for patch constants
|
||||
|
||||
// Get the Control Flow Graph for this phase, build it if necessary.
|
||||
HLSLcc::ControlFlow::ControlFlowGraph &GetCFG();
|
||||
|
||||
uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
|
||||
uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
|
||||
|
||||
private:
|
||||
bool m_CFGInitialized;
|
||||
HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
|
||||
};
|
||||
|
||||
class Shader
|
||||
{
|
||||
public:
|
||||
|
||||
Shader()
|
||||
:
|
||||
ui32MajorVersion(0),
|
||||
ui32MinorVersion(0),
|
||||
eShaderType(INVALID_SHADER),
|
||||
eTargetLanguage(LANG_DEFAULT),
|
||||
extensions(0),
|
||||
fp64(0),
|
||||
ui32ShaderLength(0),
|
||||
aui32FuncTableToFuncPointer(),
|
||||
aui32FuncBodyToFuncTable(),
|
||||
funcTable(),
|
||||
funcPointer(),
|
||||
ui32NextClassFuncName(),
|
||||
pui32FirstToken(NULL),
|
||||
asPhases(),
|
||||
sInfo(),
|
||||
abScalarInput(),
|
||||
abScalarOutput(),
|
||||
aIndexedInput(),
|
||||
aIndexedOutput(),
|
||||
aIndexedInputParents(),
|
||||
aeResourceDims(),
|
||||
acInputDeclared(),
|
||||
acOutputDeclared(),
|
||||
aiOpcodeUsed(NUM_OPCODES, 0),
|
||||
ui32CurrentVertexOutputStream(0),
|
||||
textureSamplers(),
|
||||
aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0),
|
||||
ui32CurrentStructuredBufferIndex(),
|
||||
m_CubemapArrayExtensionDeclared(false),
|
||||
m_TextureBufferExtensionDeclared(false),
|
||||
m_ClipDistanceExtensionDeclared(false)
|
||||
{
|
||||
}
|
||||
|
||||
// Retrieve the number of components the temp register has.
|
||||
uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const;
|
||||
|
||||
//Hull shaders have multiple phases.
|
||||
//Each phase has its own temps.
|
||||
//Convert from per-phase temps to global temps.
|
||||
void ConsolidateHullTempVars();
|
||||
|
||||
// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list
|
||||
void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase);
|
||||
|
||||
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
|
||||
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
|
||||
// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero.
|
||||
void PrepareStructuredBufferBindingSlots();
|
||||
|
||||
// Detect temp registers per data type that are actually used.
|
||||
void PruneTempRegisters();
|
||||
|
||||
// Check if inputs and outputs are accessed across semantic boundaries
|
||||
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
|
||||
void AnalyzeIOOverlap();
|
||||
|
||||
// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
|
||||
void ForcePositionToHighp();
|
||||
|
||||
void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used.
|
||||
|
||||
void ExpandSWAPCs();
|
||||
|
||||
uint32_t ui32MajorVersion;
|
||||
uint32_t ui32MinorVersion;
|
||||
SHADER_TYPE eShaderType;
|
||||
|
||||
GLLang eTargetLanguage;
|
||||
const struct GlExtensions *extensions;
|
||||
|
||||
int fp64;
|
||||
|
||||
//DWORDs in program code, including version and length tokens.
|
||||
uint32_t ui32ShaderLength;
|
||||
|
||||
|
||||
//Instruction* functions;//non-main subroutines
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncTableToFuncPointer; // dynamic alloc?
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncBodyToFuncTable;
|
||||
|
||||
struct FuncTableEntry{
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncBodies;
|
||||
};
|
||||
HLSLcc::growing_vector<FuncTableEntry> funcTable;
|
||||
|
||||
struct FuncPointerEntry {
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncTables;
|
||||
uint32_t ui32NumBodiesPerTable;
|
||||
};
|
||||
|
||||
HLSLcc::growing_vector<FuncPointerEntry> funcPointer;
|
||||
|
||||
HLSLcc::growing_vector<uint32_t> ui32NextClassFuncName;
|
||||
|
||||
const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream.
|
||||
|
||||
std::vector<ShaderPhase> asPhases;
|
||||
|
||||
ShaderInfo sInfo;
|
||||
|
||||
// There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex.
|
||||
// Which one is used depends on the context:
|
||||
// per-vertex space is used in vertex/pixel/geom shaders always
|
||||
// hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT)
|
||||
// domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT
|
||||
|
||||
// Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch
|
||||
// Note that these ints are component masks
|
||||
HLSLcc::growing_vector<int> abScalarInput[2];
|
||||
HLSLcc::growing_vector<int> abScalarOutput[2];
|
||||
|
||||
HLSLcc::growing_vector<int> aIndexedInput[2];
|
||||
HLSLcc::growing_vector<bool> aIndexedOutput[2];
|
||||
|
||||
HLSLcc::growing_vector<int> aIndexedInputParents[2];
|
||||
|
||||
HLSLcc::growing_vector<RESOURCE_DIMENSION> aeResourceDims;
|
||||
|
||||
HLSLcc::growing_vector<char> acInputDeclared[2];
|
||||
HLSLcc::growing_vector<char> acOutputDeclared[2];
|
||||
|
||||
std::vector<int> aiOpcodeUsed; // Initialized to NUM_OPCODES elements above.
|
||||
|
||||
uint32_t ui32CurrentVertexOutputStream;
|
||||
|
||||
TextureSamplerPairs textureSamplers;
|
||||
|
||||
std::vector<uint32_t> aui32StructuredBufferBindingPoints;
|
||||
uint32_t ui32CurrentStructuredBufferIndex;
|
||||
|
||||
bool m_CubemapArrayExtensionDeclared;
|
||||
bool m_TextureBufferExtensionDeclared;
|
||||
bool m_ClipDistanceExtensionDeclared;
|
||||
|
||||
std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
|
||||
std::vector<char> psInt16TempSizes; // min16ints
|
||||
std::vector<char> psInt12TempSizes; // min12ints
|
||||
std::vector<char> psUIntTempSizes; // Same for uints
|
||||
std::vector<char> psUInt16TempSizes; // ... and for uint16's
|
||||
std::vector<char> psFloatTempSizes; // ...and for floats
|
||||
std::vector<char> psFloat16TempSizes; // ...and for min16floats
|
||||
std::vector<char> psFloat10TempSizes; // ...and for min10floats
|
||||
std::vector<char> psDoubleTempSizes; // ...and for doubles
|
||||
std::vector<char> psBoolTempSizes; // ... and for bools
|
||||
|
||||
private:
|
||||
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);
|
||||
|
||||
};
|
35
src/internal_includes/Translator.h
Normal file
35
src/internal_includes/Translator.h
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
#pragma once
|
||||
#include "HLSLCrossCompilerContext.h"
|
||||
#include "Shader.h"
|
||||
|
||||
struct Declaration;
|
||||
// Base class for translator backend implenentations.
|
||||
class Translator
|
||||
{
|
||||
protected:
|
||||
HLSLCrossCompilerContext *psContext;
|
||||
public:
|
||||
explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {}
|
||||
virtual ~Translator() {}
|
||||
|
||||
virtual bool Translate() = 0;
|
||||
|
||||
virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
|
||||
|
||||
// Translate system value type to name, return true if succeeded and no further translation is necessary
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0;
|
||||
|
||||
// In GLSL, the input and output names cannot clash.
|
||||
// Also, the output name of previous stage must match the input name of the next stage.
|
||||
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
|
||||
//
|
||||
virtual void SetIOPrefixes() = 0;
|
||||
|
||||
void SetExtensions(const struct GlExtensions *ext)
|
||||
{
|
||||
psContext->psShader->extensions = ext;
|
||||
}
|
||||
|
||||
|
||||
};
|
141
src/internal_includes/UseDefineChains.h
Normal file
141
src/internal_includes/UseDefineChains.h
Normal file
@ -0,0 +1,141 @@
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
struct DefineUseChainEntry;
|
||||
struct UseDefineChainEntry;
|
||||
|
||||
typedef std::set<DefineUseChainEntry *> DefineSet;
|
||||
typedef std::set<UseDefineChainEntry *> UsageSet;
|
||||
|
||||
struct Instruction;
|
||||
class Operand;
|
||||
class ShaderInfo;
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace ControlFlow
|
||||
{
|
||||
class ControlFlowGraph;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// Def-Use chain per temp component
|
||||
struct DefineUseChainEntry
|
||||
{
|
||||
DefineUseChainEntry()
|
||||
: psInst(0)
|
||||
, psOp(0)
|
||||
, usages()
|
||||
, writeMask(0)
|
||||
, index(0)
|
||||
, isStandalone(0)
|
||||
{
|
||||
memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *));
|
||||
}
|
||||
|
||||
Instruction *psInst; // The declaration (write to this temp component)
|
||||
Operand *psOp; // The operand within this instruction for the write target
|
||||
UsageSet usages; // List of usages that are dependent on this write
|
||||
uint32_t writeMask; // Access mask; which all components were written to in the same op
|
||||
uint32_t index; // For which component was this definition created for?
|
||||
uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings
|
||||
struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components.
|
||||
|
||||
#if _DEBUG
|
||||
bool operator==(const DefineUseChainEntry &a) const
|
||||
{
|
||||
if (psInst != a.psInst)
|
||||
return false;
|
||||
if (psOp != a.psOp)
|
||||
return false;
|
||||
if (writeMask != a.writeMask)
|
||||
return false;
|
||||
if (index != a.index)
|
||||
return false;
|
||||
if (isStandalone != a.isStandalone)
|
||||
return false;
|
||||
|
||||
// Just check that each one has the same amount of usages
|
||||
if (usages.size() != a.usages.size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
typedef std::list<DefineUseChainEntry> DefineUseChain;
|
||||
|
||||
struct UseDefineChainEntry
|
||||
{
|
||||
UseDefineChainEntry()
|
||||
: psInst(0)
|
||||
, psOp(0)
|
||||
, defines()
|
||||
, accessMask(0)
|
||||
, index(0)
|
||||
{
|
||||
memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *));
|
||||
}
|
||||
|
||||
Instruction *psInst; // The use (read from this temp component)
|
||||
Operand *psOp; // The operand within this instruction for the read
|
||||
DefineSet defines; // List of writes that are visible to this read
|
||||
uint32_t accessMask; // Which all components were read together with this one
|
||||
uint32_t index; // For which component was this usage created for?
|
||||
struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components.
|
||||
|
||||
#if _DEBUG
|
||||
bool operator==(const UseDefineChainEntry &a) const
|
||||
{
|
||||
if (psInst != a.psInst)
|
||||
return false;
|
||||
if (psOp != a.psOp)
|
||||
return false;
|
||||
if (accessMask != a.accessMask)
|
||||
return false;
|
||||
if (index != a.index)
|
||||
return false;
|
||||
|
||||
// Just check that each one has the same amount of usages
|
||||
if (defines.size() != a.defines.size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
typedef std::list<UseDefineChainEntry> UseDefineChain;
|
||||
|
||||
typedef std::map<uint32_t, UseDefineChain> UseDefineChains;
|
||||
typedef std::map<uint32_t, DefineUseChain> DefineUseChains;
|
||||
typedef std::vector<DefineUseChainEntry *> ActiveDefinitions;
|
||||
|
||||
// Do flow control analysis on the instructions and build the define-use and use-define chains
|
||||
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg);
|
||||
|
||||
// Do temp splitting based on use-define chains
|
||||
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable);
|
||||
|
||||
// Based on the sampler precisions, downgrade the definitions if possible.
|
||||
void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps);
|
||||
|
||||
// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition.
|
||||
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps);
|
||||
|
||||
// Write the uses and defines back to Instruction and Operand member lists.
|
||||
void WriteBackUsesAndDefines(DefineUseChains &psDUChains);
|
||||
|
18
src/internal_includes/debug.h
Normal file
18
src/internal_includes/debug.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include "assert.h"
|
||||
#define ASSERT(expr) CustomAssert(expr)
|
||||
static void CustomAssert(int expression)
|
||||
{
|
||||
if(!expression)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define ASSERT(expr)
|
||||
#endif
|
||||
|
||||
#endif
|
10
src/internal_includes/decode.h
Normal file
10
src/internal_includes/decode.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef DECODE_H
|
||||
#define DECODE_H
|
||||
|
||||
#include "internal_includes/Shader.h"
|
||||
|
||||
Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags);
|
||||
|
||||
void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst);
|
||||
|
||||
#endif
|
249
src/internal_includes/languages.h
Normal file
249
src/internal_includes/languages.h
Normal file
@ -0,0 +1,249 @@
|
||||
#ifndef LANGUAGES_H
|
||||
#define LANGUAGES_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
static int InOutSupported(const GLLang eLang)
|
||||
{
|
||||
if(eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int WriteToFragData(const GLLang eLang)
|
||||
{
|
||||
if(eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ShaderBitEncodingSupported(const GLLang eLang)
|
||||
{
|
||||
if( eLang != LANG_ES_300 &&
|
||||
eLang != LANG_ES_310 &&
|
||||
eLang < LANG_330)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveOverloadedTextureFuncs(const GLLang eLang)
|
||||
{
|
||||
if(eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//Only enable for ES.
|
||||
//Not present in 120, ignored in other desktop languages.
|
||||
static int HavePrecisionQualifers(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_ES_100 && eLang <= LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveCubemapArray(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool IsESLanguage(const GLLang eLang)
|
||||
{
|
||||
return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST);
|
||||
}
|
||||
|
||||
static bool IsDesktopGLLanguage(const GLLang eLang)
|
||||
{
|
||||
return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST);
|
||||
}
|
||||
|
||||
//Only on vertex inputs and pixel outputs.
|
||||
static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions)
|
||||
{
|
||||
if(eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveInOutLocationQualifier(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_410 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//layout(binding = X) uniform {uniformA; uniformB;}
|
||||
//layout(location = X) uniform uniform_name;
|
||||
static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExtensions *extensions, unsigned int flags)
|
||||
{
|
||||
if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)
|
||||
return 0;
|
||||
|
||||
if (eLang >= LANG_430 || eLang == LANG_ES_310 ||
|
||||
(extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int DualSourceBlendSupported(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_330)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SubroutinesSupported(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_400)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Before 430, flat/smooth/centroid/noperspective must match
|
||||
//between fragment and its previous stage.
|
||||
//HLSL bytecode only tells us the interpolation in pixel shader.
|
||||
static int PixelInterpDependency(const GLLang eLang)
|
||||
{
|
||||
if(eLang < LANG_430)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveUVec(const GLLang eLang)
|
||||
{
|
||||
switch(eLang)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
case LANG_120:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveGather(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_400 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveGatherNonConstOffset(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int HaveQueryLod(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_400)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveQueryLevels(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_430)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveFragmentCoordConventions(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_150)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveGeometryShaderARB(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_150)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveAtomicCounter(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveAtomicMem(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_430 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveImageAtomics(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_420)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveCompute(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_430 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveImageLoadStore(const GLLang eLang)
|
||||
{
|
||||
if(eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
27
src/internal_includes/reflect.h
Normal file
27
src/internal_includes/reflect.h
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef REFLECT_H
|
||||
#define REFLECT_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
struct ShaderPhase_TAG;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t* pui32Inputs;
|
||||
uint32_t* pui32Outputs;
|
||||
uint32_t* pui32Resources;
|
||||
uint32_t* pui32Interfaces;
|
||||
uint32_t* pui32Inputs11;
|
||||
uint32_t* pui32Outputs11;
|
||||
uint32_t* pui32OutputsWithStreams;
|
||||
uint32_t* pui32PatchConstants;
|
||||
uint32_t* pui32PatchConstants11;
|
||||
} ReflectionChunks;
|
||||
|
||||
void LoadShaderInfo(const uint32_t ui32MajorVersion,
|
||||
const uint32_t ui32MinorVersion,
|
||||
const ReflectionChunks* psChunks,
|
||||
ShaderInfo* psInfo, uint32_t decodeFlags);
|
||||
|
||||
#endif
|
||||
|
107
src/internal_includes/toGLSL.h
Normal file
107
src/internal_includes/toGLSL.h
Normal file
@ -0,0 +1,107 @@
|
||||
#pragma once
|
||||
|
||||
#include "hlslcc.h"
|
||||
#include "internal_includes/Translator.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
|
||||
class ToGLSL : public Translator
|
||||
{
|
||||
protected:
|
||||
GLLang language;
|
||||
public:
|
||||
explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {}
|
||||
// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
|
||||
GLLang SetLanguage(GLLang suggestedLanguage);
|
||||
|
||||
virtual bool Translate();
|
||||
virtual void TranslateDeclaration(const Declaration* psDecl);
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
|
||||
virtual void SetIOPrefixes();
|
||||
|
||||
private:
|
||||
|
||||
void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
|
||||
void TranslateInstruction(Instruction* psInst, bool isEmbedded = false);
|
||||
|
||||
void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
|
||||
|
||||
void TranslateOperandIndex(const Operand* psOperand, int index);
|
||||
void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add);
|
||||
|
||||
void AddOpAssignToDestWithMask(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
|
||||
void AddAssignToDest(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
|
||||
void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
|
||||
|
||||
void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName);
|
||||
void HandleOutputRedirect(const Declaration *psDecl, const char *Precision);
|
||||
void HandleInputRedirect(const Declaration *psDecl, const char *Precision);
|
||||
|
||||
void AddUserOutput(const Declaration* psDecl);
|
||||
void DeclareStructConstants(const uint32_t ui32BindingPoint,
|
||||
const ConstantBuffer* psCBuf, const Operand* psOperand,
|
||||
bstring glsl);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CMP_EQ,
|
||||
CMP_LT,
|
||||
CMP_GE,
|
||||
CMP_NE,
|
||||
} ComparisonType;
|
||||
|
||||
void AddComparison(Instruction* psInst, ComparisonType eType,
|
||||
uint32_t typeFlag);
|
||||
|
||||
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false);
|
||||
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
|
||||
void CallBinaryOp(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false);
|
||||
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, uint32_t dataType);
|
||||
void CallHelper3(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2Int(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2UInt(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1Int(
|
||||
const char* name,
|
||||
Instruction* psInst,
|
||||
const int dest,
|
||||
const int src0,
|
||||
int paramsShouldFollowWriteMask);
|
||||
void TranslateTexelFetch(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexelFetchOffset(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexCoord(
|
||||
const RESOURCE_DIMENSION eResDim,
|
||||
Operand* psTexCoordOperand);
|
||||
void GetResInfoData(Instruction* psInst, int index, int destElem);
|
||||
void TranslateTextureSample(Instruction* psInst,
|
||||
uint32_t ui32Flags);
|
||||
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
|
||||
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
|
||||
void TranslateShaderStorageStore(Instruction* psInst);
|
||||
void TranslateShaderStorageLoad(Instruction* psInst);
|
||||
void TranslateAtomicMemOp(Instruction* psInst);
|
||||
void TranslateConditional(
|
||||
Instruction* psInst,
|
||||
bstring glsl);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
23
src/internal_includes/toGLSLOperand.h
Normal file
23
src/internal_includes/toGLSLOperand.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef TO_GLSL_OPERAND_H
|
||||
#define TO_GLSL_OPERAND_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "bstrlib.h"
|
||||
#include "ShaderInfo.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
|
||||
//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag);
|
||||
// Translate operand but add additional component mask
|
||||
//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask);
|
||||
|
||||
void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase);
|
||||
void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase);
|
||||
|
||||
void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
|
||||
std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
|
||||
|
||||
std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
|
||||
void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
|
||||
|
||||
#endif
|
193
src/internal_includes/toMetal.h
Normal file
193
src/internal_includes/toMetal.h
Normal file
@ -0,0 +1,193 @@
|
||||
|
||||
#pragma once
|
||||
#include "internal_includes/Translator.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
// We store struct definition contents inside a vector of strings
|
||||
struct StructDefinition
|
||||
{
|
||||
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
|
||||
|
||||
std::vector<std::string> m_Members; // A vector of strings with the struct members
|
||||
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
|
||||
bool m_IsPrinted; // Has this struct been printed out yet?
|
||||
};
|
||||
|
||||
typedef std::map<std::string, StructDefinition> StructDefinitions;
|
||||
|
||||
// Map of extra function definitions we need to add before the shader body but after the declarations.
|
||||
typedef std::map<std::string, std::string> FunctionDefinitions;
|
||||
|
||||
// A helper class for allocating binding slots
|
||||
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
|
||||
class BindingSlotAllocator
|
||||
{
|
||||
typedef std::map<uint32_t, uint32_t> SlotMap;
|
||||
SlotMap m_Allocations;
|
||||
public:
|
||||
BindingSlotAllocator() : m_Allocations(), m_NextFreeSlot(0) {}
|
||||
|
||||
enum BindType
|
||||
{
|
||||
ConstantBuffer = 0,
|
||||
RWBuffer,
|
||||
Texture,
|
||||
UAV
|
||||
};
|
||||
|
||||
// isUAV is only meaningful for texture slots
|
||||
|
||||
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
|
||||
{
|
||||
// The key is regNumber with the bindtype stored to highest 16 bits
|
||||
uint32_t key = regNo | (uint32_t(type) << 16);
|
||||
SlotMap::iterator itr = m_Allocations.find(key);
|
||||
if (itr == m_Allocations.end())
|
||||
{
|
||||
m_Allocations.insert(std::make_pair(key, m_NextFreeSlot));
|
||||
return m_NextFreeSlot++;
|
||||
}
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_NextFreeSlot;
|
||||
};
|
||||
|
||||
|
||||
class ToMetal : public Translator
|
||||
{
|
||||
protected:
|
||||
GLLang language;
|
||||
public:
|
||||
explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx), m_ShadowSamplerDeclared(false) {}
|
||||
|
||||
virtual bool Translate();
|
||||
virtual void TranslateDeclaration(const Declaration *psDecl);
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
|
||||
std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
|
||||
|
||||
virtual void SetIOPrefixes();
|
||||
|
||||
private:
|
||||
void TranslateInstruction(Instruction* psInst);
|
||||
|
||||
void DeclareBuiltinInput(const Declaration *psDecl);
|
||||
void DeclareBuiltinOutput(const Declaration *psDecl);
|
||||
|
||||
// Retrieve the name of the output struct for this shader
|
||||
std::string GetOutputStructName() const;
|
||||
std::string GetInputStructName() const;
|
||||
|
||||
void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName);
|
||||
void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName);
|
||||
|
||||
void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint);
|
||||
void DeclareStructType(const std::string &name, const std::vector<ShaderVar> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false);
|
||||
void DeclareStructType(const std::string &name, const std::vector<ShaderVarType> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0);
|
||||
void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
|
||||
void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
|
||||
void DeclareBufferVariable(const Declaration *psDecl, const bool isRaw, const bool isUAV);
|
||||
|
||||
void DeclareResource(const Declaration *psDecl);
|
||||
void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim);
|
||||
|
||||
void DeclareOutput(const Declaration *decl);
|
||||
|
||||
void PrintStructDeclarations(StructDefinitions &defs);
|
||||
|
||||
std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber);
|
||||
|
||||
// ToMetalOperand.cpp
|
||||
std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true);
|
||||
std::string TranslateOperandIndex(const Operand* psOperand, int index);
|
||||
std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
|
||||
|
||||
// ToMetalInstruction.cpp
|
||||
|
||||
void AddOpAssignToDestWithMask(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
|
||||
void AddAssignToDest(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
|
||||
void AddAssignPrologue(int numParenthesis);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CMP_EQ,
|
||||
CMP_LT,
|
||||
CMP_GE,
|
||||
CMP_NE,
|
||||
} ComparisonType;
|
||||
|
||||
void AddComparison(Instruction* psInst, ComparisonType eType,
|
||||
uint32_t typeFlag);
|
||||
|
||||
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
|
||||
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
|
||||
void CallBinaryOp(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
|
||||
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, uint32_t dataType);
|
||||
void CallHelper3(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2Int(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2UInt(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1Int(
|
||||
const char* name,
|
||||
Instruction* psInst,
|
||||
const int dest,
|
||||
const int src0,
|
||||
int paramsShouldFollowWriteMask);
|
||||
void TranslateTexelFetch(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexelFetchOffset(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexCoord(
|
||||
const RESOURCE_DIMENSION eResDim,
|
||||
Operand* psTexCoordOperand);
|
||||
void GetResInfoData(Instruction* psInst, int index, int destElem);
|
||||
void TranslateTextureSample(Instruction* psInst,
|
||||
uint32_t ui32Flags);
|
||||
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
|
||||
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
|
||||
void TranslateShaderStorageStore(Instruction* psInst);
|
||||
void TranslateShaderStorageLoad(Instruction* psInst);
|
||||
void TranslateAtomicMemOp(Instruction* psInst);
|
||||
void TranslateConditional(
|
||||
Instruction* psInst,
|
||||
bstring glsl);
|
||||
|
||||
// The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters
|
||||
StructDefinitions m_StructDefinitions;
|
||||
|
||||
// A <function name, body text> map of extra helper functions we'll need.
|
||||
FunctionDefinitions m_FunctionDefinitions;
|
||||
|
||||
BindingSlotAllocator m_TextureSlots;
|
||||
BindingSlotAllocator m_BufferSlots;
|
||||
|
||||
std::string m_ExtraGlobalDefinitions;
|
||||
|
||||
bool m_ShadowSamplerDeclared;
|
||||
|
||||
void EnsureShadowSamplerDeclared();
|
||||
|
||||
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
|
||||
void DeclareExtraFunction(const std::string &name, const std::string &body);
|
||||
|
||||
// Move all lowp -> mediump
|
||||
void ClampPartialPrecisions();
|
||||
};
|
||||
|
||||
|
3
src/internal_includes/toMetalDeclaration.h
Normal file
3
src/internal_includes/toMetalDeclaration.h
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/Declaration.h"
|
783
src/internal_includes/tokens.h
Normal file
783
src/internal_includes/tokens.h
Normal file
@ -0,0 +1,783 @@
|
||||
#ifndef TOKENS_H
|
||||
#define TOKENS_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
enum SHADER_PHASE_TYPE
|
||||
{
|
||||
SHADER_PHASE_INVALID = -1,
|
||||
MAIN_PHASE = 0,
|
||||
HS_GLOBAL_DECL_PHASE = 1,
|
||||
HS_CTRL_POINT_PHASE = 2,
|
||||
HS_FORK_PHASE = 3,
|
||||
HS_JOIN_PHASE = 4
|
||||
};
|
||||
|
||||
static SHADER_TYPE DecodeShaderType(uint32_t ui32Token)
|
||||
{
|
||||
return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16);
|
||||
}
|
||||
|
||||
static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x000000f0) >> 4;
|
||||
}
|
||||
|
||||
static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x0000000f);
|
||||
}
|
||||
|
||||
static uint32_t DecodeInstructionLength(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x7f000000) >> 24;
|
||||
}
|
||||
|
||||
static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x80000000) >> 31;
|
||||
}
|
||||
|
||||
typedef enum EXTENDED_OPCODE_TYPE
|
||||
{
|
||||
EXTENDED_OPCODE_EMPTY = 0,
|
||||
EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
|
||||
EXTENDED_OPCODE_RESOURCE_DIM = 2,
|
||||
EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
|
||||
} EXTENDED_OPCODE_TYPE;
|
||||
|
||||
static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token)
|
||||
{
|
||||
return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f);
|
||||
}
|
||||
|
||||
|
||||
static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4))&0xF);
|
||||
}
|
||||
|
||||
static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4 + 6))&0xF);
|
||||
}
|
||||
|
||||
enum OPCODE_TYPE
|
||||
{
|
||||
//For DX9
|
||||
OPCODE_POW = -6,
|
||||
OPCODE_DP2ADD = -5,
|
||||
OPCODE_LRP = -4,
|
||||
OPCODE_ENDREP = -3,
|
||||
OPCODE_REP = -2,
|
||||
OPCODE_SPECIAL_DCL_IMMCONST = -1,
|
||||
|
||||
OPCODE_ADD,
|
||||
OPCODE_AND,
|
||||
OPCODE_BREAK,
|
||||
OPCODE_BREAKC,
|
||||
OPCODE_CALL,
|
||||
OPCODE_CALLC,
|
||||
OPCODE_CASE,
|
||||
OPCODE_CONTINUE,
|
||||
OPCODE_CONTINUEC,
|
||||
OPCODE_CUT,
|
||||
OPCODE_DEFAULT,
|
||||
OPCODE_DERIV_RTX,
|
||||
OPCODE_DERIV_RTY,
|
||||
OPCODE_DISCARD,
|
||||
OPCODE_DIV,
|
||||
OPCODE_DP2,
|
||||
OPCODE_DP3,
|
||||
OPCODE_DP4,
|
||||
OPCODE_ELSE,
|
||||
OPCODE_EMIT,
|
||||
OPCODE_EMITTHENCUT,
|
||||
OPCODE_ENDIF,
|
||||
OPCODE_ENDLOOP,
|
||||
OPCODE_ENDSWITCH,
|
||||
OPCODE_EQ,
|
||||
OPCODE_EXP,
|
||||
OPCODE_FRC,
|
||||
OPCODE_FTOI,
|
||||
OPCODE_FTOU,
|
||||
OPCODE_GE,
|
||||
OPCODE_IADD,
|
||||
OPCODE_IF,
|
||||
OPCODE_IEQ,
|
||||
OPCODE_IGE,
|
||||
OPCODE_ILT,
|
||||
OPCODE_IMAD,
|
||||
OPCODE_IMAX,
|
||||
OPCODE_IMIN,
|
||||
OPCODE_IMUL,
|
||||
OPCODE_INE,
|
||||
OPCODE_INEG,
|
||||
OPCODE_ISHL,
|
||||
OPCODE_ISHR,
|
||||
OPCODE_ITOF,
|
||||
OPCODE_LABEL,
|
||||
OPCODE_LD,
|
||||
OPCODE_LD_MS,
|
||||
OPCODE_LOG,
|
||||
OPCODE_LOOP,
|
||||
OPCODE_LT,
|
||||
OPCODE_MAD,
|
||||
OPCODE_MIN,
|
||||
OPCODE_MAX,
|
||||
OPCODE_CUSTOMDATA,
|
||||
OPCODE_MOV,
|
||||
OPCODE_MOVC,
|
||||
OPCODE_MUL,
|
||||
OPCODE_NE,
|
||||
OPCODE_NOP,
|
||||
OPCODE_NOT,
|
||||
OPCODE_OR,
|
||||
OPCODE_RESINFO,
|
||||
OPCODE_RET,
|
||||
OPCODE_RETC,
|
||||
OPCODE_ROUND_NE,
|
||||
OPCODE_ROUND_NI,
|
||||
OPCODE_ROUND_PI,
|
||||
OPCODE_ROUND_Z,
|
||||
OPCODE_RSQ,
|
||||
OPCODE_SAMPLE,
|
||||
OPCODE_SAMPLE_C,
|
||||
OPCODE_SAMPLE_C_LZ,
|
||||
OPCODE_SAMPLE_L,
|
||||
OPCODE_SAMPLE_D,
|
||||
OPCODE_SAMPLE_B,
|
||||
OPCODE_SQRT,
|
||||
OPCODE_SWITCH,
|
||||
OPCODE_SINCOS,
|
||||
OPCODE_UDIV,
|
||||
OPCODE_ULT,
|
||||
OPCODE_UGE,
|
||||
OPCODE_UMUL,
|
||||
OPCODE_UMAD,
|
||||
OPCODE_UMAX,
|
||||
OPCODE_UMIN,
|
||||
OPCODE_USHR,
|
||||
OPCODE_UTOF,
|
||||
OPCODE_XOR,
|
||||
OPCODE_DCL_RESOURCE, // DCL* opcodes have
|
||||
OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats.
|
||||
OPCODE_DCL_SAMPLER,
|
||||
OPCODE_DCL_INDEX_RANGE,
|
||||
OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY,
|
||||
OPCODE_DCL_GS_INPUT_PRIMITIVE,
|
||||
OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,
|
||||
OPCODE_DCL_INPUT,
|
||||
OPCODE_DCL_INPUT_SGV,
|
||||
OPCODE_DCL_INPUT_SIV,
|
||||
OPCODE_DCL_INPUT_PS,
|
||||
OPCODE_DCL_INPUT_PS_SGV,
|
||||
OPCODE_DCL_INPUT_PS_SIV,
|
||||
OPCODE_DCL_OUTPUT,
|
||||
OPCODE_DCL_OUTPUT_SGV,
|
||||
OPCODE_DCL_OUTPUT_SIV,
|
||||
OPCODE_DCL_TEMPS,
|
||||
OPCODE_DCL_INDEXABLE_TEMP,
|
||||
OPCODE_DCL_GLOBAL_FLAGS,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
OPCODE_RESERVED_10,
|
||||
|
||||
// ---------- DX 10.1 op codes---------------------
|
||||
|
||||
OPCODE_LOD,
|
||||
OPCODE_GATHER4,
|
||||
OPCODE_SAMPLE_POS,
|
||||
OPCODE_SAMPLE_INFO,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This should be 10.1's version of NUM_OPCODES
|
||||
OPCODE_RESERVED_10_1,
|
||||
|
||||
// ---------- DX 11 op codes---------------------
|
||||
OPCODE_HS_DECLS, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader
|
||||
|
||||
OPCODE_EMIT_STREAM,
|
||||
OPCODE_CUT_STREAM,
|
||||
OPCODE_EMITTHENCUT_STREAM,
|
||||
OPCODE_INTERFACE_CALL,
|
||||
|
||||
OPCODE_BUFINFO,
|
||||
OPCODE_DERIV_RTX_COARSE,
|
||||
OPCODE_DERIV_RTX_FINE,
|
||||
OPCODE_DERIV_RTY_COARSE,
|
||||
OPCODE_DERIV_RTY_FINE,
|
||||
OPCODE_GATHER4_C,
|
||||
OPCODE_GATHER4_PO,
|
||||
OPCODE_GATHER4_PO_C,
|
||||
OPCODE_RCP,
|
||||
OPCODE_F32TOF16,
|
||||
OPCODE_F16TOF32,
|
||||
OPCODE_UADDC,
|
||||
OPCODE_USUBB,
|
||||
OPCODE_COUNTBITS,
|
||||
OPCODE_FIRSTBIT_HI,
|
||||
OPCODE_FIRSTBIT_LO,
|
||||
OPCODE_FIRSTBIT_SHI,
|
||||
OPCODE_UBFE,
|
||||
OPCODE_IBFE,
|
||||
OPCODE_BFI,
|
||||
OPCODE_BFREV,
|
||||
OPCODE_SWAPC,
|
||||
|
||||
OPCODE_DCL_STREAM,
|
||||
OPCODE_DCL_FUNCTION_BODY,
|
||||
OPCODE_DCL_FUNCTION_TABLE,
|
||||
OPCODE_DCL_INTERFACE,
|
||||
|
||||
OPCODE_DCL_INPUT_CONTROL_POINT_COUNT,
|
||||
OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT,
|
||||
OPCODE_DCL_TESS_DOMAIN,
|
||||
OPCODE_DCL_TESS_PARTITIONING,
|
||||
OPCODE_DCL_TESS_OUTPUT_PRIMITIVE,
|
||||
OPCODE_DCL_HS_MAX_TESSFACTOR,
|
||||
OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT,
|
||||
OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT,
|
||||
|
||||
OPCODE_DCL_THREAD_GROUP,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
|
||||
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
|
||||
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
|
||||
OPCODE_DCL_RESOURCE_RAW,
|
||||
OPCODE_DCL_RESOURCE_STRUCTURED,
|
||||
OPCODE_LD_UAV_TYPED,
|
||||
OPCODE_STORE_UAV_TYPED,
|
||||
OPCODE_LD_RAW,
|
||||
OPCODE_STORE_RAW,
|
||||
OPCODE_LD_STRUCTURED,
|
||||
OPCODE_STORE_STRUCTURED,
|
||||
OPCODE_ATOMIC_AND,
|
||||
OPCODE_ATOMIC_OR,
|
||||
OPCODE_ATOMIC_XOR,
|
||||
OPCODE_ATOMIC_CMP_STORE,
|
||||
OPCODE_ATOMIC_IADD,
|
||||
OPCODE_ATOMIC_IMAX,
|
||||
OPCODE_ATOMIC_IMIN,
|
||||
OPCODE_ATOMIC_UMAX,
|
||||
OPCODE_ATOMIC_UMIN,
|
||||
OPCODE_IMM_ATOMIC_ALLOC,
|
||||
OPCODE_IMM_ATOMIC_CONSUME,
|
||||
OPCODE_IMM_ATOMIC_IADD,
|
||||
OPCODE_IMM_ATOMIC_AND,
|
||||
OPCODE_IMM_ATOMIC_OR,
|
||||
OPCODE_IMM_ATOMIC_XOR,
|
||||
OPCODE_IMM_ATOMIC_EXCH,
|
||||
OPCODE_IMM_ATOMIC_CMP_EXCH,
|
||||
OPCODE_IMM_ATOMIC_IMAX,
|
||||
OPCODE_IMM_ATOMIC_IMIN,
|
||||
OPCODE_IMM_ATOMIC_UMAX,
|
||||
OPCODE_IMM_ATOMIC_UMIN,
|
||||
OPCODE_SYNC,
|
||||
|
||||
OPCODE_DADD,
|
||||
OPCODE_DMAX,
|
||||
OPCODE_DMIN,
|
||||
OPCODE_DMUL,
|
||||
OPCODE_DEQ,
|
||||
OPCODE_DGE,
|
||||
OPCODE_DLT,
|
||||
OPCODE_DNE,
|
||||
OPCODE_DMOV,
|
||||
OPCODE_DMOVC,
|
||||
OPCODE_DTOF,
|
||||
OPCODE_FTOD,
|
||||
|
||||
OPCODE_EVAL_SNAPPED,
|
||||
OPCODE_EVAL_SAMPLE_INDEX,
|
||||
OPCODE_EVAL_CENTROID,
|
||||
|
||||
OPCODE_DCL_GS_INSTANCE_COUNT,
|
||||
|
||||
OPCODE_ABORT,
|
||||
OPCODE_DEBUG_BREAK,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This marks the end of D3D11.0 opcodes
|
||||
OPCODE_RESERVED_11,
|
||||
|
||||
OPCODE_DDIV,
|
||||
OPCODE_DFMA,
|
||||
OPCODE_DRCP,
|
||||
|
||||
OPCODE_MSAD,
|
||||
|
||||
OPCODE_DTOI,
|
||||
OPCODE_DTOU,
|
||||
OPCODE_ITOD,
|
||||
OPCODE_UTOD,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This marks the end of D3D11.1 opcodes
|
||||
OPCODE_RESERVED_11_1,
|
||||
|
||||
NUM_OPCODES,
|
||||
OPCODE_INVALID = NUM_OPCODES,
|
||||
};
|
||||
|
||||
static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token)
|
||||
{
|
||||
return (OPCODE_TYPE)(ui32Token & 0x00007ff);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INDEX_0D,
|
||||
INDEX_1D,
|
||||
INDEX_2D,
|
||||
INDEX_3D,
|
||||
} OPERAND_INDEX_DIMENSION;
|
||||
|
||||
static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_TYPE
|
||||
{
|
||||
OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10,
|
||||
OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9,
|
||||
OPERAND_TYPE_SPECIAL_TEXCOORD = -8,
|
||||
OPERAND_TYPE_SPECIAL_POSITION = -7,
|
||||
OPERAND_TYPE_SPECIAL_FOG = -6,
|
||||
OPERAND_TYPE_SPECIAL_POINTSIZE = -5,
|
||||
OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4,
|
||||
OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3,
|
||||
OPERAND_TYPE_SPECIAL_ADDRESS = -2,
|
||||
OPERAND_TYPE_SPECIAL_IMMCONST = -1,
|
||||
OPERAND_TYPE_TEMP = 0, // Temporary Register File
|
||||
OPERAND_TYPE_INPUT = 1, // General Input Register File
|
||||
OPERAND_TYPE_OUTPUT = 2, // General Output Register File
|
||||
OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable)
|
||||
OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s)
|
||||
// If for example, operand token bits
|
||||
// [01:00]==OPERAND_4_COMPONENT,
|
||||
// this means that the operand type:
|
||||
// OPERAND_TYPE_IMMEDIATE32
|
||||
// results in 4 additional 32bit
|
||||
// DWORDS present for the operand.
|
||||
OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO
|
||||
OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state
|
||||
OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture)
|
||||
OPERAND_TYPE_CONSTANT_BUFFER= 8, // Reference to constant buffer
|
||||
OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9, // Reference to immediate constant buffer
|
||||
OPERAND_TYPE_LABEL = 10, // Label
|
||||
OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
|
||||
OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth
|
||||
OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations
|
||||
// Below Are operands new in DX 10.1
|
||||
OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
|
||||
OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
|
||||
// Below Are operands new in DX 11
|
||||
OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource
|
||||
OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition
|
||||
OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
|
||||
OPERAND_TYPE_INTERFACE = 19, // Reference to an interface
|
||||
OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
|
||||
OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
|
||||
OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
|
||||
OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
|
||||
OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
|
||||
OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
|
||||
OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
|
||||
OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
|
||||
OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
|
||||
OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer
|
||||
OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
|
||||
OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
|
||||
OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
|
||||
OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
|
||||
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
|
||||
OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
|
||||
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
|
||||
OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
|
||||
OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
|
||||
OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth
|
||||
OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
|
||||
} OPERAND_TYPE;
|
||||
|
||||
static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12);
|
||||
}
|
||||
|
||||
static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token)
|
||||
{
|
||||
return (SPECIAL_NAME)(ui32Token & 0x0000ffff);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_INDEX_REPRESENTATION
|
||||
{
|
||||
OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD
|
||||
OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs
|
||||
// (HI32:LO32)
|
||||
OPERAND_INDEX_RELATIVE = 2, // Extra operand
|
||||
OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
|
||||
// extra operand
|
||||
OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
|
||||
// (HI32:LO32) followed
|
||||
// by extra operand
|
||||
} OPERAND_INDEX_REPRESENTATION;
|
||||
|
||||
static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3<<(22+3*((ui32Dimension)&3)))) >> (22+3*((ui32Dimension)&3)));
|
||||
}
|
||||
|
||||
typedef enum OPERAND_NUM_COMPONENTS
|
||||
{
|
||||
OPERAND_0_COMPONENT = 0,
|
||||
OPERAND_1_COMPONENT = 1,
|
||||
OPERAND_4_COMPONENT = 2,
|
||||
OPERAND_N_COMPONENT = 3 // unused for now
|
||||
} OPERAND_NUM_COMPONENTS;
|
||||
|
||||
static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_4_COMPONENT_SELECTION_MODE
|
||||
{
|
||||
OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components
|
||||
OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components
|
||||
OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
|
||||
} OPERAND_4_COMPONENT_SELECTION_MODE;
|
||||
|
||||
static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2);
|
||||
}
|
||||
|
||||
#define OPERAND_4_COMPONENT_MASK_X 0x00000001
|
||||
#define OPERAND_4_COMPONENT_MASK_Y 0x00000002
|
||||
#define OPERAND_4_COMPONENT_MASK_Z 0x00000004
|
||||
#define OPERAND_4_COMPONENT_MASK_W 0x00000008
|
||||
#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X
|
||||
#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y
|
||||
#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z
|
||||
#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W
|
||||
#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f
|
||||
|
||||
static uint32_t DecodeOperand4CompMask(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x000000f0) >> 4);
|
||||
}
|
||||
|
||||
static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x00000ff0) >> 4);
|
||||
}
|
||||
|
||||
static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x00000030) >> 4);
|
||||
}
|
||||
|
||||
#define OPERAND_4_COMPONENT_X 0
|
||||
#define OPERAND_4_COMPONENT_Y 1
|
||||
#define OPERAND_4_COMPONENT_Z 2
|
||||
#define OPERAND_4_COMPONENT_W 3
|
||||
|
||||
static const uint32_t NO_SWIZZLE = (( (OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y<<2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6))/*<<4*/);
|
||||
|
||||
static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6)));
|
||||
static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6)));
|
||||
static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6)));
|
||||
static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6)));
|
||||
|
||||
static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp)
|
||||
{
|
||||
return (uint32_t)(((ui32Token)>>(4+2*((comp)&3)))&3);
|
||||
}
|
||||
|
||||
typedef enum RESOURCE_DIMENSION
|
||||
{
|
||||
RESOURCE_DIMENSION_UNKNOWN = 0,
|
||||
RESOURCE_DIMENSION_BUFFER = 1,
|
||||
RESOURCE_DIMENSION_TEXTURE1D = 2,
|
||||
RESOURCE_DIMENSION_TEXTURE2D = 3,
|
||||
RESOURCE_DIMENSION_TEXTURE2DMS = 4,
|
||||
RESOURCE_DIMENSION_TEXTURE3D = 5,
|
||||
RESOURCE_DIMENSION_TEXTURECUBE = 6,
|
||||
RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
|
||||
RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
|
||||
RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
|
||||
RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
|
||||
RESOURCE_DIMENSION_RAW_BUFFER = 11,
|
||||
RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
|
||||
} RESOURCE_DIMENSION;
|
||||
|
||||
static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11);
|
||||
}
|
||||
|
||||
static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6);
|
||||
}
|
||||
|
||||
typedef enum INSTRUCTION_TEST_BOOLEAN
|
||||
{
|
||||
INSTRUCTION_TEST_ZERO = 0,
|
||||
INSTRUCTION_TEST_NONZERO = 1
|
||||
} INSTRUCTION_TEST_BOOLEAN;
|
||||
|
||||
static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token)
|
||||
{
|
||||
return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18);
|
||||
}
|
||||
|
||||
static uint32_t DecodeIsOperandExtended(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x80000000) >> 31;
|
||||
}
|
||||
|
||||
typedef enum EXTENDED_OPERAND_TYPE
|
||||
{
|
||||
EXTENDED_OPERAND_EMPTY = 0,
|
||||
EXTENDED_OPERAND_MODIFIER = 1,
|
||||
} EXTENDED_OPERAND_TYPE;
|
||||
|
||||
static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token)
|
||||
{
|
||||
return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_MODIFIER
|
||||
{
|
||||
OPERAND_MODIFIER_NONE = 0,
|
||||
OPERAND_MODIFIER_NEG = 1,
|
||||
OPERAND_MODIFIER_ABS = 2,
|
||||
OPERAND_MODIFIER_ABSNEG = 3,
|
||||
} OPERAND_MODIFIER;
|
||||
|
||||
static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6);
|
||||
}
|
||||
|
||||
static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1<<11);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1<<12);
|
||||
static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1<<13);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1<<14);
|
||||
static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1<<15);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1<<16);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1<<17);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1<<18);
|
||||
|
||||
static uint32_t DecodeGlobalFlags(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)(ui32Token & 0x00fff800);
|
||||
}
|
||||
|
||||
static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token)
|
||||
{
|
||||
return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11);
|
||||
}
|
||||
|
||||
|
||||
typedef enum PRIMITIVE_TOPOLOGY
|
||||
{
|
||||
PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
|
||||
PRIMITIVE_TOPOLOGY_POINTLIST = 1,
|
||||
PRIMITIVE_TOPOLOGY_LINELIST = 2,
|
||||
PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
|
||||
// 6 is reserved for legacy triangle fans
|
||||
// Adjacency values should be equal to (0x8 & non-adjacency):
|
||||
PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
|
||||
PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
|
||||
} PRIMITIVE_TOPOLOGY;
|
||||
|
||||
static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token)
|
||||
{
|
||||
return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum PRIMITIVE
|
||||
{
|
||||
PRIMITIVE_UNDEFINED = 0,
|
||||
PRIMITIVE_POINT = 1,
|
||||
PRIMITIVE_LINE = 2,
|
||||
PRIMITIVE_TRIANGLE = 3,
|
||||
// Adjacency values should be equal to (0x4 & non-adjacency):
|
||||
PRIMITIVE_LINE_ADJ = 6,
|
||||
PRIMITIVE_TRIANGLE_ADJ = 7,
|
||||
PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
|
||||
PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
|
||||
PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
|
||||
PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
|
||||
PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
|
||||
PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
|
||||
PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
|
||||
PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
|
||||
PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
|
||||
PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
|
||||
PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
|
||||
PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
|
||||
PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
|
||||
PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
|
||||
PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
|
||||
PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
|
||||
PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
|
||||
PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
|
||||
PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
|
||||
PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
|
||||
PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
|
||||
PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
|
||||
PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
|
||||
PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
|
||||
PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
|
||||
PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
|
||||
PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
|
||||
PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
|
||||
PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
|
||||
PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
|
||||
PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
|
||||
PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
|
||||
} PRIMITIVE;
|
||||
|
||||
static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token)
|
||||
{
|
||||
return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum TESSELLATOR_DOMAIN
|
||||
{
|
||||
TESSELLATOR_DOMAIN_UNDEFINED = 0,
|
||||
TESSELLATOR_DOMAIN_ISOLINE = 1,
|
||||
TESSELLATOR_DOMAIN_TRI = 2,
|
||||
TESSELLATOR_DOMAIN_QUAD = 3
|
||||
} TESSELLATOR_DOMAIN;
|
||||
|
||||
static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11);
|
||||
}
|
||||
|
||||
static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11);
|
||||
}
|
||||
|
||||
static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800;
|
||||
static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000;
|
||||
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000;
|
||||
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000;
|
||||
|
||||
static uint32_t DecodeSyncFlags(uint32_t ui32Token)
|
||||
{
|
||||
return ui32Token & 0x00007800;
|
||||
}
|
||||
|
||||
// The number of types that implement this interface
|
||||
static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x0000ffff) >> 0);
|
||||
}
|
||||
|
||||
// The number of interfaces that are defined in this array.
|
||||
static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0xffff0000) >> 16);
|
||||
}
|
||||
|
||||
typedef enum CUSTOMDATA_CLASS
|
||||
{
|
||||
CUSTOMDATA_COMMENT = 0,
|
||||
CUSTOMDATA_DEBUGINFO,
|
||||
CUSTOMDATA_OPAQUE,
|
||||
CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
|
||||
CUSTOMDATA_SHADER_MESSAGE,
|
||||
} CUSTOMDATA_CLASS;
|
||||
|
||||
static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token)
|
||||
{
|
||||
return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11);
|
||||
}
|
||||
|
||||
static uint32_t DecodeInstructionSaturate(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x00002000) ? 1 : 0;
|
||||
}
|
||||
|
||||
typedef enum OPERAND_MIN_PRECISION
|
||||
{
|
||||
OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision
|
||||
// for the shader model
|
||||
OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float
|
||||
OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float
|
||||
OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer
|
||||
OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer
|
||||
} OPERAND_MIN_PRECISION;
|
||||
|
||||
static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x0001C000) >> 14;
|
||||
}
|
||||
|
||||
static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token)
|
||||
{
|
||||
return ((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD
|
||||
{
|
||||
IMMEDIATE_ADDRESS_OFFSET_U = 0,
|
||||
IMMEDIATE_ADDRESS_OFFSET_V = 1,
|
||||
IMMEDIATE_ADDRESS_OFFSET_W = 2,
|
||||
} IMMEDIATE_ADDRESS_OFFSET_COORD;
|
||||
|
||||
|
||||
#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3))
|
||||
#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
|
||||
|
||||
static uint32_t DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_COORD eCoord, uint32_t ui32Token)
|
||||
{
|
||||
return ((((ui32Token)&IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord))>>(IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord))));
|
||||
}
|
||||
|
||||
// UAV access scope flags
|
||||
static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000;
|
||||
static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token)
|
||||
{
|
||||
return ui32Token & 0x00010000;
|
||||
}
|
||||
|
||||
|
||||
typedef enum RESINFO_RETURN_TYPE
|
||||
{
|
||||
RESINFO_INSTRUCTION_RETURN_FLOAT = 0,
|
||||
RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1,
|
||||
RESINFO_INSTRUCTION_RETURN_UINT = 2
|
||||
} RESINFO_RETURN_TYPE;
|
||||
|
||||
static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token)
|
||||
{
|
||||
return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11);
|
||||
}
|
||||
|
||||
#endif
|
600
src/reflect.cpp
Normal file
600
src/reflect.cpp
Normal file
@ -0,0 +1,600 @@
|
||||
|
||||
#include "internal_includes/reflect.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/decode.h"
|
||||
#include "bstrlib.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void FormatVariableName(std::string & Name)
|
||||
{
|
||||
/* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx
|
||||
The uniform function parameters appear in the
|
||||
constant table prepended with a dollar sign ($),
|
||||
unlike the global variables. The dollar sign is
|
||||
required to avoid name collisions between local
|
||||
uniform inputs and global variables of the same name.*/
|
||||
|
||||
/* Leave $ThisPointer, $Element and $Globals as-is.
|
||||
Otherwise remove $ character ($ is not a valid character for GLSL variable names). */
|
||||
if(Name[0] == '$')
|
||||
{
|
||||
if(strcmp(Name.c_str(), "$Element") !=0 &&
|
||||
strcmp(Name.c_str(), "$Globals") != 0 &&
|
||||
strcmp(Name.c_str(), "$ThisPointer") != 0)
|
||||
{
|
||||
Name[0] = '_';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string ReadStringFromTokenStream(const uint32_t* tokens)
|
||||
{
|
||||
char* charTokens = (char*) tokens;
|
||||
return std::string(charTokens);
|
||||
}
|
||||
|
||||
static int MaskToRebaseOffset(const uint32_t mask)
|
||||
{
|
||||
int res = 0;
|
||||
uint32_t m = mask;
|
||||
while ((m & 1) == 0)
|
||||
{
|
||||
res++;
|
||||
m = m >> 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void ReadInputSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int extended)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/* const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psInputSignatures.clear();
|
||||
psShaderInfo->psInputSignatures.resize(ui32ElementCount);
|
||||
|
||||
for(i=0; i<ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if(extended)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME) *pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are read
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if(extended)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION) *pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken+ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static void ReadOutputSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int minPrec,
|
||||
const int streams)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psOutputSignatures.clear();
|
||||
psShaderInfo->psOutputSignatures.resize(ui32ElementCount);
|
||||
|
||||
for(i=0; i<ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if(streams)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
// Massage some special inputs/outputs to match the types of GLSL counterparts
|
||||
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
|
||||
{
|
||||
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
|
||||
}
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are NEVER written.
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if(minPrec)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int minPrec,
|
||||
const int streams)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psPatchConstantSignatures.clear();
|
||||
psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount);
|
||||
|
||||
for(i=0; i<ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if(streams)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
// Massage some special inputs/outputs to match the types of GLSL counterparts
|
||||
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
|
||||
{
|
||||
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
|
||||
}
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are NEVER written.
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if(minPrec)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static const uint32_t* ReadResourceBinding(const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui32Tokens++;
|
||||
|
||||
psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken+ui32NameOffset));
|
||||
FormatVariableName(psBinding->name);
|
||||
|
||||
psBinding->eType = (ResourceType)*pui32Tokens++;
|
||||
psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++;
|
||||
psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++;
|
||||
psBinding->ui32NumSamples = *pui32Tokens++;
|
||||
psBinding->ui32BindPoint = *pui32Tokens++;
|
||||
psBinding->ui32BindCount = *pui32Tokens++;
|
||||
psBinding->ui32Flags = *pui32Tokens++;
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN;
|
||||
|
||||
if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME)
|
||||
{
|
||||
if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP;
|
||||
psBinding->name.resize(psBinding->name.length() - 6);
|
||||
}
|
||||
else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP;
|
||||
psBinding->name.resize(psBinding->name.length() - 8);
|
||||
}
|
||||
else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP;
|
||||
psBinding->name.resize(psBinding->name.length() - 5);
|
||||
}
|
||||
}
|
||||
|
||||
return pui32Tokens;
|
||||
}
|
||||
|
||||
//Read D3D11_SHADER_TYPE_DESC
|
||||
static void ReadShaderVariableType(const uint32_t ui32MajorVersion,
|
||||
const uint32_t* pui32FirstConstBufToken,
|
||||
const uint32_t* pui32tokens, ShaderVarType* varType)
|
||||
{
|
||||
const uint16_t* pui16Tokens = (const uint16_t*) pui32tokens;
|
||||
uint16_t ui32MemberCount;
|
||||
uint32_t ui32MemberOffset;
|
||||
const uint32_t* pui32MemberTokens;
|
||||
uint32_t i;
|
||||
|
||||
varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0];
|
||||
varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1];
|
||||
varType->Rows = pui16Tokens[2];
|
||||
varType->Columns = pui16Tokens[3];
|
||||
varType->Elements = pui16Tokens[4];
|
||||
|
||||
varType->MemberCount = ui32MemberCount = pui16Tokens[5];
|
||||
varType->Members.clear();
|
||||
|
||||
if(varType->ParentCount)
|
||||
{
|
||||
// Add empty brackets for array parents. Indices are filled in later in the printing codes.
|
||||
if (varType->Parent->Elements > 1)
|
||||
varType->fullName = varType->Parent->fullName + "[]." + varType->name;
|
||||
else
|
||||
varType->fullName = varType->Parent->fullName + "." + varType->name;
|
||||
}
|
||||
|
||||
if(ui32MemberCount)
|
||||
{
|
||||
varType->Members.resize(ui32MemberCount);
|
||||
|
||||
ui32MemberOffset = pui32tokens[3];
|
||||
|
||||
pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberOffset);
|
||||
|
||||
for(i=0; i< ui32MemberCount; ++i)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui32MemberTokens++;
|
||||
uint32_t ui32MemberTypeOffset = *pui32MemberTokens++;
|
||||
|
||||
varType->Members[i].Parent = varType;
|
||||
varType->Members[i].ParentCount = varType->ParentCount + 1;
|
||||
|
||||
varType->Members[i].Offset = *pui32MemberTokens++;
|
||||
|
||||
varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
|
||||
ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken,
|
||||
(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberTypeOffset), &varType->Members[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo,
|
||||
const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t ui32NameOffset = *pui32Tokens++;
|
||||
uint32_t ui32VarCount = *pui32Tokens++;
|
||||
uint32_t ui32VarOffset = *pui32Tokens++;
|
||||
const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32VarOffset);
|
||||
|
||||
psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
FormatVariableName(psBuffer->name);
|
||||
|
||||
psBuffer->asVars.clear();
|
||||
psBuffer->asVars.resize(ui32VarCount);
|
||||
|
||||
for(i=0; i<ui32VarCount; ++i)
|
||||
{
|
||||
//D3D11_SHADER_VARIABLE_DESC
|
||||
ShaderVar * const psVar = &psBuffer->asVars[i];
|
||||
|
||||
uint32_t ui32Flags;
|
||||
uint32_t ui32TypeOffset;
|
||||
uint32_t ui32DefaultValueOffset;
|
||||
|
||||
ui32NameOffset = *pui32VarToken++;
|
||||
|
||||
psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
FormatVariableName(psVar->name);
|
||||
|
||||
psVar->ui32StartOffset = *pui32VarToken++;
|
||||
psVar->ui32Size = *pui32VarToken++;
|
||||
ui32Flags = *pui32VarToken++;
|
||||
ui32TypeOffset = *pui32VarToken++;
|
||||
|
||||
psVar->sType.name = psVar->name;
|
||||
psVar->sType.fullName = psVar->name;
|
||||
psVar->sType.Parent = 0;
|
||||
psVar->sType.ParentCount = 0;
|
||||
psVar->sType.Offset = 0;
|
||||
psVar->sType.m_IsUsed = false;
|
||||
|
||||
ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken,
|
||||
(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32TypeOffset), &psVar->sType);
|
||||
|
||||
ui32DefaultValueOffset = *pui32VarToken++;
|
||||
|
||||
|
||||
if (psShaderInfo->ui32MajorVersion >= 5)
|
||||
{
|
||||
/*uint32_t StartTexture = * */pui32VarToken++;
|
||||
/*uint32_t TextureSize = * */pui32VarToken++;
|
||||
/*uint32_t StartSampler = * */pui32VarToken++;
|
||||
/*uint32_t SamplerSize = * */pui32VarToken++;
|
||||
}
|
||||
|
||||
psVar->haveDefaultValue = 0;
|
||||
|
||||
if(ui32DefaultValueOffset)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4;
|
||||
const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32DefaultValueOffset);
|
||||
|
||||
//Always a sequence of 4-bytes at the moment.
|
||||
//bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes.
|
||||
ASSERT(psVar->ui32Size%4 == 0);
|
||||
|
||||
psVar->haveDefaultValue = 1;
|
||||
|
||||
psVar->pui32DefaultValues.clear();
|
||||
psVar->pui32DefaultValues.resize(psVar->ui32Size / 4);
|
||||
|
||||
for(i=0; i<ui32NumDefaultValues;++i)
|
||||
{
|
||||
psVar->pui32DefaultValues[i] = pui32DefaultValToken[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
uint32_t ui32Flags;
|
||||
uint32_t ui32BufferType;
|
||||
|
||||
psBuffer->ui32TotalSizeInBytes = *pui32Tokens++;
|
||||
ui32Flags = *pui32Tokens++;
|
||||
ui32BufferType = *pui32Tokens++;
|
||||
}
|
||||
|
||||
return pui32Tokens;
|
||||
}
|
||||
|
||||
static void ReadResources(const uint32_t* pui32Tokens,//in
|
||||
ShaderInfo* psShaderInfo, //out
|
||||
uint32_t decodeFlags)
|
||||
{
|
||||
ResourceBinding* psResBindings;
|
||||
ConstantBuffer* psConstantBuffers;
|
||||
const uint32_t* pui32ConstantBuffers;
|
||||
const uint32_t* pui32ResourceBindings;
|
||||
const uint32_t* pui32FirstToken = pui32Tokens;
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t ui32NumConstantBuffers = *pui32Tokens++;
|
||||
const uint32_t ui32ConstantBufferOffset = *pui32Tokens++;
|
||||
|
||||
uint32_t ui32NumResourceBindings = *pui32Tokens++;
|
||||
uint32_t ui32ResourceBindingOffset = *pui32Tokens++;
|
||||
/*uint32_t ui32ShaderModel = * */ pui32Tokens++;
|
||||
/*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx
|
||||
|
||||
//Resources
|
||||
pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset);
|
||||
|
||||
psShaderInfo->psResourceBindings.clear();
|
||||
psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings);
|
||||
psResBindings = &psShaderInfo->psResourceBindings[0];
|
||||
|
||||
for(i=0; i < ui32NumResourceBindings; ++i)
|
||||
{
|
||||
pui32ResourceBindings = ReadResourceBinding(pui32FirstToken, pui32ResourceBindings, psResBindings+i, decodeFlags);
|
||||
ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS);
|
||||
}
|
||||
|
||||
//Constant buffers
|
||||
pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset);
|
||||
|
||||
psShaderInfo->psConstantBuffers.clear();
|
||||
psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers);
|
||||
psConstantBuffers = &psShaderInfo->psConstantBuffers[0];
|
||||
|
||||
for(i=0; i < ui32NumConstantBuffers; ++i)
|
||||
{
|
||||
pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers+i);
|
||||
}
|
||||
|
||||
|
||||
//Map resource bindings to constant buffers
|
||||
if(psShaderInfo->psConstantBuffers.size())
|
||||
{
|
||||
for(i=0; i < ui32NumResourceBindings; ++i)
|
||||
{
|
||||
ResourceGroup eRGroup;
|
||||
uint32_t cbufIndex = 0;
|
||||
|
||||
eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType);
|
||||
|
||||
//Find the constant buffer whose name matches the resource at the given resource binding point
|
||||
for(cbufIndex=0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++)
|
||||
{
|
||||
if(psConstantBuffers[cbufIndex].name == psResBindings[i].name)
|
||||
{
|
||||
psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType)
|
||||
{
|
||||
const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens;
|
||||
uint32_t ui32NameOffset = *pui32Tokens;
|
||||
pui16Tokens+= 2;
|
||||
|
||||
psClassType->ui16ID = *pui16Tokens++;
|
||||
psClassType->ui16ConstBufStride = *pui16Tokens++;
|
||||
psClassType->ui16Texture = *pui16Tokens++;
|
||||
psClassType->ui16Sampler = *pui16Tokens++;
|
||||
|
||||
psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
|
||||
|
||||
return pui16Tokens;
|
||||
}
|
||||
|
||||
static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui16Tokens++ << 16;
|
||||
ui32NameOffset |= *pui16Tokens++;
|
||||
|
||||
psClassInstance->ui16ID = *pui16Tokens++;
|
||||
psClassInstance->ui16ConstBuf = *pui16Tokens++;
|
||||
psClassInstance->ui16ConstBufOffset = *pui16Tokens++;
|
||||
psClassInstance->ui16Texture = *pui16Tokens++;
|
||||
psClassInstance->ui16Sampler = *pui16Tokens++;
|
||||
|
||||
psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
|
||||
|
||||
return pui16Tokens;
|
||||
}
|
||||
|
||||
|
||||
static void ReadInterfaces(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t ui32StartSlot;
|
||||
const uint32_t* pui32FirstInterfaceToken = pui32Tokens;
|
||||
const uint32_t ui32ClassInstanceCount = *pui32Tokens++;
|
||||
const uint32_t ui32ClassTypeCount = *pui32Tokens++;
|
||||
const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++;
|
||||
const uint32_t ui32ClassInstanceOffset = *pui32Tokens++;
|
||||
const uint32_t ui32ClassTypeOffset = *pui32Tokens++;
|
||||
const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++;
|
||||
|
||||
const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset);
|
||||
const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset);
|
||||
const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset);
|
||||
|
||||
const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots;
|
||||
|
||||
ClassType* psClassTypes;
|
||||
ClassInstance* psClassInstances;
|
||||
|
||||
psShaderInfo->psClassTypes.clear();
|
||||
psShaderInfo->psClassTypes.resize(ui32ClassTypeCount);
|
||||
psClassTypes = &psShaderInfo->psClassTypes[0];
|
||||
|
||||
for(i=0; i<ui32ClassTypeCount; ++i)
|
||||
{
|
||||
pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes+i);
|
||||
psClassTypes[i].ui16ID = (uint16_t)i;
|
||||
}
|
||||
|
||||
psShaderInfo->psClassInstances.clear();
|
||||
psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount);
|
||||
psClassInstances = &psShaderInfo->psClassInstances[0];
|
||||
|
||||
for(i=0; i<ui32ClassInstanceCount; ++i)
|
||||
{
|
||||
pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances+i);
|
||||
}
|
||||
|
||||
//Slots map function table to $ThisPointer cbuffer variable index
|
||||
ui32StartSlot = 0;
|
||||
for(i=0; i<ui32InterfaceSlotRecordCount;++i)
|
||||
{
|
||||
uint32_t k;
|
||||
|
||||
const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32Count = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++;
|
||||
|
||||
const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken+ui32TypeIDOffset);
|
||||
const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken+ui32TableIDOffset);
|
||||
|
||||
for(k=0; k < ui32Count; ++k)
|
||||
{
|
||||
psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++;
|
||||
}
|
||||
|
||||
ui32StartSlot += ui32SlotSpan;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void LoadShaderInfo(const uint32_t ui32MajorVersion,
|
||||
const uint32_t ui32MinorVersion,
|
||||
const ReflectionChunks* psChunks,
|
||||
ShaderInfo* psInfo,
|
||||
uint32_t decodeFlags)
|
||||
{
|
||||
const uint32_t* pui32Inputs = psChunks->pui32Inputs;
|
||||
const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11;
|
||||
const uint32_t* pui32Resources = psChunks->pui32Resources;
|
||||
const uint32_t* pui32Interfaces = psChunks->pui32Interfaces;
|
||||
const uint32_t* pui32Outputs = psChunks->pui32Outputs;
|
||||
const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11;
|
||||
const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams;
|
||||
const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants;
|
||||
const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11;
|
||||
|
||||
psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
|
||||
psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
|
||||
|
||||
psInfo->ui32MajorVersion = ui32MajorVersion;
|
||||
psInfo->ui32MinorVersion = ui32MinorVersion;
|
||||
|
||||
|
||||
if(pui32Inputs)
|
||||
ReadInputSignatures(pui32Inputs, psInfo, 0);
|
||||
if(pui32Inputs11)
|
||||
ReadInputSignatures(pui32Inputs11, psInfo, 1);
|
||||
if(pui32Resources)
|
||||
ReadResources(pui32Resources, psInfo, decodeFlags);
|
||||
if(pui32Interfaces)
|
||||
ReadInterfaces(pui32Interfaces, psInfo);
|
||||
if(pui32Outputs)
|
||||
ReadOutputSignatures(pui32Outputs, psInfo, 0, 0);
|
||||
if(pui32Outputs11)
|
||||
ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1);
|
||||
if(pui32OutputsWithStreams)
|
||||
ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1);
|
||||
if(pui32PatchConstants)
|
||||
ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0);
|
||||
if (pui32PatchConstants11)
|
||||
ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1);
|
||||
|
||||
{
|
||||
uint32_t i;
|
||||
for(i=0; i<psInfo->psConstantBuffers.size();++i)
|
||||
{
|
||||
if (psInfo->psConstantBuffers[i].name == "$ThisPointer")
|
||||
{
|
||||
psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
806
src/toGLSL.cpp
Normal file
806
src/toGLSL.cpp
Normal file
@ -0,0 +1,806 @@
|
||||
#include <memory>
|
||||
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/decode.h"
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "bstrlib.h"
|
||||
#include "internal_includes/toGLSL.h"
|
||||
#include "internal_includes/toGLSLOperand.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/languages.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/UseDefineChains.h"
|
||||
#include "internal_includes/DataTypeAnalysis.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/LoopTransform.h"
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
// In GLSL, the input and output names cannot clash.
|
||||
// Also, the output name of previous stage must match the input name of the next stage.
|
||||
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
|
||||
//
|
||||
void ToGLSL::SetIOPrefixes()
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
psContext->inputPrefix = "in_";
|
||||
psContext->outputPrefix = "vs_";
|
||||
break;
|
||||
|
||||
case HULL_SHADER:
|
||||
// Input always coming from vertex shader
|
||||
psContext->inputPrefix = "vs_";
|
||||
psContext->outputPrefix = "hs_";
|
||||
break;
|
||||
|
||||
case DOMAIN_SHADER:
|
||||
// There's no domain shader without hull shader
|
||||
psContext->inputPrefix = "hs_";
|
||||
psContext->outputPrefix = "ds_";
|
||||
break;
|
||||
|
||||
case GEOMETRY_SHADER:
|
||||
// The input depends on whether there's a tessellation shader before us
|
||||
if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER))
|
||||
psContext->inputPrefix = "ds_";
|
||||
else
|
||||
psContext->inputPrefix = "vs_";
|
||||
|
||||
psContext->outputPrefix = "gs_";
|
||||
break;
|
||||
|
||||
case PIXEL_SHADER:
|
||||
// The inputs can come from geom shader, domain shader or directly from vertex shader
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
|
||||
{
|
||||
psContext->inputPrefix = "gs_";
|
||||
}
|
||||
else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
|
||||
{
|
||||
psContext->inputPrefix = "ds_";
|
||||
}
|
||||
else
|
||||
{
|
||||
psContext->inputPrefix = "vs_";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
psContext->inputPrefix = "vs_";
|
||||
}
|
||||
psContext->outputPrefix = "";
|
||||
break;
|
||||
|
||||
|
||||
case COMPUTE_SHADER:
|
||||
default:
|
||||
// No prefixes
|
||||
psContext->inputPrefix = "";
|
||||
psContext->outputPrefix = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
|
||||
{
|
||||
bstring glsl = *psContext->currentGLSLString;
|
||||
bstring extensions = psContext->extensions;
|
||||
bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310);
|
||||
bool GL_ARB_shader_image_load_store = false;
|
||||
|
||||
if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330))
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
|
||||
}
|
||||
|
||||
if(!HaveCompute(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->eShaderType == COMPUTE_SHADER)
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n");
|
||||
}
|
||||
|
||||
if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW])
|
||||
{
|
||||
bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) ||
|
||||
!HaveAtomicCounter(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if( psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED])
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!HaveImageAtomics(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH])
|
||||
{
|
||||
if (isES)
|
||||
bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n");
|
||||
else
|
||||
GL_ARB_shader_image_load_store = true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveGather(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C])
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO])
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveQueryLod(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD])
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveQueryLevels(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO])
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED])
|
||||
{
|
||||
GL_ARB_shader_image_load_store = true;
|
||||
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
|
||||
}
|
||||
else
|
||||
if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] ||
|
||||
psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED])
|
||||
{
|
||||
GL_ARB_shader_image_load_store = true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
|
||||
{
|
||||
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n");
|
||||
bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
|
||||
{
|
||||
if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n");
|
||||
bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (GL_ARB_shader_image_load_store)
|
||||
bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n");
|
||||
|
||||
//Handle fragment shader default precision
|
||||
if ((psContext->psShader->eShaderType == PIXEL_SHADER) &&
|
||||
(psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310))
|
||||
{
|
||||
// Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText()
|
||||
// Except on Vulkan
|
||||
if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
|
||||
bcatcstr(glsl, "precision highp float;\n");
|
||||
|
||||
|
||||
// Define default int precision to highp to avoid issues on platforms that actually implement mediump
|
||||
bcatcstr(glsl, "precision highp int;\n");
|
||||
}
|
||||
|
||||
if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage))
|
||||
{
|
||||
bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n");
|
||||
}
|
||||
|
||||
if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150)
|
||||
{
|
||||
if(psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT)
|
||||
bcatcstr(glsl,"layout(origin_upper_left) in vec4 gl_FragCoord;\n");
|
||||
|
||||
if(psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER)
|
||||
bcatcstr(glsl,"layout(pixel_center_integer) in vec4 gl_FragCoord;\n");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
OpenGL 4.1 API spec:
|
||||
To use any built-in input or output in the gl_PerVertex block in separable
|
||||
program objects, shader code must redeclare that block prior to use.
|
||||
*/
|
||||
/* DISABLED FOR NOW */
|
||||
/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410)
|
||||
{
|
||||
bcatcstr(glsl, "out gl_PerVertex {\n");
|
||||
bcatcstr(glsl, "vec4 gl_Position;\n");
|
||||
bcatcstr(glsl, "float gl_PointSize;\n");
|
||||
bcatcstr(glsl, "float gl_ClipDistance[];");
|
||||
bcatcstr(glsl, "};\n");
|
||||
}*/
|
||||
}
|
||||
|
||||
GLLang ChooseLanguage(Shader* psShader)
|
||||
{
|
||||
// Depends on the HLSL shader model extracted from bytecode.
|
||||
switch(psShader->ui32MajorVersion)
|
||||
{
|
||||
case 5:
|
||||
{
|
||||
return LANG_430;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
return LANG_330;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return LANG_120;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetVersionString(GLLang language)
|
||||
{
|
||||
switch(language)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
{
|
||||
return "#version 100\n";
|
||||
break;
|
||||
}
|
||||
case LANG_ES_300:
|
||||
{
|
||||
return "#version 300 es\n";
|
||||
break;
|
||||
}
|
||||
case LANG_ES_310:
|
||||
{
|
||||
return "#version 310 es\n";
|
||||
break;
|
||||
}
|
||||
case LANG_120:
|
||||
{
|
||||
return "#version 120\n";
|
||||
break;
|
||||
}
|
||||
case LANG_130:
|
||||
{
|
||||
return "#version 130\n";
|
||||
break;
|
||||
}
|
||||
case LANG_140:
|
||||
{
|
||||
return "#version 140\n";
|
||||
break;
|
||||
}
|
||||
case LANG_150:
|
||||
{
|
||||
return "#version 150\n";
|
||||
break;
|
||||
}
|
||||
case LANG_330:
|
||||
{
|
||||
return "#version 330\n";
|
||||
break;
|
||||
}
|
||||
case LANG_400:
|
||||
{
|
||||
return "#version 400\n";
|
||||
break;
|
||||
}
|
||||
case LANG_410:
|
||||
{
|
||||
return "#version 410\n";
|
||||
break;
|
||||
}
|
||||
case LANG_420:
|
||||
{
|
||||
return "#version 420\n";
|
||||
break;
|
||||
}
|
||||
case LANG_430:
|
||||
{
|
||||
return "#version 430\n";
|
||||
break;
|
||||
}
|
||||
case LANG_440:
|
||||
{
|
||||
return "#version 440\n";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType)
|
||||
{
|
||||
switch (eType)
|
||||
{
|
||||
default:
|
||||
case MAIN_PHASE: return "";
|
||||
case HS_GLOBAL_DECL_PHASE: return "hs_global_decls";
|
||||
case HS_FORK_PHASE: return "fork_phase";
|
||||
case HS_CTRL_POINT_PHASE: return "control_point_phase";
|
||||
case HS_JOIN_PHASE: return "join_phase";
|
||||
}
|
||||
}
|
||||
|
||||
static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
uint32_t i;
|
||||
bstring glsl = psContext->glsl;
|
||||
|
||||
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
|
||||
{
|
||||
ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
|
||||
const char *Type;
|
||||
uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
|
||||
switch (psSig->eComponentType)
|
||||
{
|
||||
default:
|
||||
case INOUT_COMPONENT_FLOAT32:
|
||||
Type = ui32NumComponents > 1 ? "vec" : "float";
|
||||
break;
|
||||
case INOUT_COMPONENT_SINT32:
|
||||
Type = ui32NumComponents > 1 ? "ivec" : "int";
|
||||
break;
|
||||
case INOUT_COMPONENT_UINT32:
|
||||
Type = ui32NumComponents > 1 ? "uvec" : "uint";
|
||||
break;
|
||||
}
|
||||
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
|
||||
continue;
|
||||
|
||||
std::string inputName;
|
||||
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
|
||||
inputName = oss.str();
|
||||
}
|
||||
|
||||
std::string outputName;
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
|
||||
outputName = oss.str();
|
||||
}
|
||||
|
||||
const char * prec = HavePrecisionQualifers(psContext->psShader->eTargetLanguage) ? "highp ": "";
|
||||
|
||||
psContext->AddIndentation();
|
||||
if (ui32NumComponents > 1) // TODO Precision
|
||||
bformata(glsl, "in %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
else
|
||||
bformata(glsl, "in %s%s %s%s%d[];\n", prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
|
||||
psContext->AddIndentation();
|
||||
if (ui32NumComponents > 1) // TODO Precision
|
||||
bformata(glsl, "out %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
else
|
||||
bformata(glsl, "out %s%s %s%s%d[];\n", prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
}
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "void passthrough_ctrl_points()\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "{\n");
|
||||
psContext->indent++;
|
||||
|
||||
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
|
||||
{
|
||||
const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
|
||||
|
||||
psContext->AddIndentation();
|
||||
|
||||
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
|
||||
bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n");
|
||||
else
|
||||
bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "}\n");
|
||||
}
|
||||
|
||||
GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage)
|
||||
{
|
||||
language = suggestedLanguage;
|
||||
if (language == LANG_DEFAULT)
|
||||
{
|
||||
language = ChooseLanguage(psContext->psShader);
|
||||
}
|
||||
return language;
|
||||
}
|
||||
|
||||
bool ToGLSL::Translate()
|
||||
{
|
||||
bstring glsl;
|
||||
uint32_t i;
|
||||
Shader* psShader = psContext->psShader;
|
||||
uint32_t ui32Phase;
|
||||
|
||||
psContext->psTranslator = this;
|
||||
|
||||
if (language == LANG_DEFAULT)
|
||||
SetLanguage(LANG_DEFAULT);
|
||||
|
||||
SetIOPrefixes();
|
||||
psShader->ExpandSWAPCs();
|
||||
psShader->ForcePositionToHighp();
|
||||
psShader->AnalyzeIOOverlap();
|
||||
psShader->FindUnusedGlobals(psContext->flags);
|
||||
|
||||
psContext->indent = 0;
|
||||
|
||||
glsl = bfromcstralloc (1024 * 10, "\n");
|
||||
bstring extensions = bfromcstralloc (1024 * 10, GetVersionString(language));
|
||||
psContext->extensions = extensions;
|
||||
|
||||
psContext->glsl = glsl;
|
||||
for(i=0; i<psShader->asPhases.size();++i)
|
||||
{
|
||||
psShader->asPhases[i].postShaderCode = bfromcstralloc (1024 * 5, "");
|
||||
psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
|
||||
}
|
||||
psContext->currentGLSLString = &glsl;
|
||||
psShader->eTargetLanguage = language;
|
||||
psContext->currentPhase = MAIN_PHASE;
|
||||
|
||||
if (psShader->extensions)
|
||||
{
|
||||
if (psShader->extensions->ARB_explicit_attrib_location)
|
||||
bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n");
|
||||
if (psShader->extensions->ARB_explicit_uniform_location)
|
||||
bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n");
|
||||
if (psShader->extensions->ARB_shading_language_420pack)
|
||||
bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n");
|
||||
}
|
||||
|
||||
psContext->ClearDependencyData();
|
||||
|
||||
AddVersionDependentCode(psContext);
|
||||
|
||||
psShader->PrepareStructuredBufferBindingSlots();
|
||||
|
||||
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
ShaderPhase &phase = psShader->asPhases[ui32Phase];
|
||||
phase.UnvectorizeImmMoves();
|
||||
psContext->DoDataTypeAnalysis(&phase);
|
||||
phase.ResolveUAVProperties();
|
||||
psShader->ResolveStructuredBufferBindingSlots(&phase);
|
||||
phase.PruneConstArrays();
|
||||
}
|
||||
|
||||
psShader->PruneTempRegisters();
|
||||
|
||||
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
// Loop transform can only be done after the temps have been pruned
|
||||
ShaderPhase &phase = psShader->asPhases[ui32Phase];
|
||||
HLSLcc::DoLoopTransform(phase);
|
||||
}
|
||||
|
||||
//Special case. Can have multiple phases.
|
||||
if(psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE };
|
||||
uint32_t ui32PhaseCallIndex;
|
||||
int perPatchSectionAdded = 0;
|
||||
int hasControlPointPhase = 0;
|
||||
|
||||
psShader->ConsolidateHullTempVars();
|
||||
|
||||
// Find out if we have a passthrough hull shader
|
||||
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
|
||||
hasControlPointPhase = 1;
|
||||
}
|
||||
|
||||
// Phase 1 is always the global decls phase, no instructions
|
||||
for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psShader->asPhases[1].psDecl[i]);
|
||||
}
|
||||
|
||||
if (hasControlPointPhase == 0)
|
||||
{
|
||||
DoHullShaderPassthrough(psContext);
|
||||
}
|
||||
|
||||
for(ui32Phase=2; ui32Phase<psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
|
||||
psContext->currentPhase = ui32Phase;
|
||||
|
||||
#ifdef _DEBUG
|
||||
bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
|
||||
#endif
|
||||
for (i = 0; i < psPhase->psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psPhase->psDecl[i]);
|
||||
}
|
||||
|
||||
bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase);
|
||||
psContext->indent++;
|
||||
|
||||
if (psPhase->psInst.size() > 0)
|
||||
{
|
||||
//The minus one here is remove the return statement at end of phases.
|
||||
//We don't want to translate that, we'll just end the function body.
|
||||
ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET);
|
||||
for (i = 0; i < psPhase->psInst.size() - 1; ++i)
|
||||
{
|
||||
TranslateInstruction(&psPhase->psInst[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
psContext->indent--;
|
||||
bcatcstr(glsl, "}\n");
|
||||
}
|
||||
|
||||
bcatcstr(glsl, "void main()\n{\n");
|
||||
|
||||
psContext->indent++;
|
||||
|
||||
// There are cases when there are no control point phases and we have to do passthrough
|
||||
if (hasControlPointPhase == 0)
|
||||
{
|
||||
// Passthrough control point phase, run the rest only once per patch
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "passthrough_ctrl_points();\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "barrier();\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "{\n");
|
||||
psContext->indent++;
|
||||
perPatchSectionAdded = 1;
|
||||
}
|
||||
|
||||
for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++)
|
||||
{
|
||||
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
uint32_t i;
|
||||
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
|
||||
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
|
||||
continue;
|
||||
|
||||
if (psPhase->earlyMain->slen > 1)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- Start Early Main ---\n");
|
||||
#endif
|
||||
bconcat(glsl, psPhase->earlyMain);
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- End Early Main ---\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < psPhase->ui32InstanceCount; i++)
|
||||
{
|
||||
|
||||
psContext->AddIndentation();
|
||||
bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i);
|
||||
}
|
||||
|
||||
if (psPhase->hasPostShaderCode)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- Post shader code ---\n");
|
||||
#endif
|
||||
bconcat(glsl, psPhase->postShaderCode);
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- End post shader code ---\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
|
||||
{
|
||||
// We're done printing control point phase, run the rest only once per patch
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "barrier();\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "{\n");
|
||||
psContext->indent++;
|
||||
perPatchSectionAdded = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (perPatchSectionAdded != 0)
|
||||
{
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "}\n");
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
bcatcstr(glsl, "}\n");
|
||||
|
||||
// Concat extensions and glsl for the final shader code.
|
||||
bconcat(extensions, glsl);
|
||||
bdestroy(glsl);
|
||||
psContext->glsl = extensions;
|
||||
glsl = NULL;
|
||||
|
||||
if(psContext->psDependencies)
|
||||
{
|
||||
//Save partitioning and primitive type for use by domain shader.
|
||||
psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim;
|
||||
|
||||
psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies)
|
||||
{
|
||||
//Load partitioning and primitive type from hull shader.
|
||||
switch(psContext->psDependencies->eTessOutPrim)
|
||||
{
|
||||
case TESSELLATOR_OUTPUT_TRIANGLE_CCW:
|
||||
{
|
||||
bcatcstr(glsl, "layout(ccw) in;\n");
|
||||
break;
|
||||
}
|
||||
case TESSELLATOR_OUTPUT_TRIANGLE_CW:
|
||||
{
|
||||
bcatcstr(glsl, "layout(cw) in;\n");
|
||||
break;
|
||||
}
|
||||
case TESSELLATOR_OUTPUT_POINT:
|
||||
{
|
||||
bcatcstr(glsl, "layout(point_mode) in;\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch(psContext->psDependencies->eTessPartitioning)
|
||||
{
|
||||
case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
|
||||
{
|
||||
bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n");
|
||||
break;
|
||||
}
|
||||
case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
|
||||
{
|
||||
bcatcstr(glsl, "layout(fractional_even_spacing) in;\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
|
||||
}
|
||||
|
||||
bcatcstr(glsl, "void main()\n{\n");
|
||||
|
||||
psContext->indent++;
|
||||
|
||||
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- Start Early Main ---\n");
|
||||
#endif
|
||||
bconcat(glsl, psContext->psShader->asPhases[0].earlyMain);
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, "//--- End Early Main ---\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
for(i=0; i < psShader->asPhases[0].psInst.size(); ++i)
|
||||
{
|
||||
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
bcatcstr(glsl, "}\n");
|
||||
|
||||
// Concat extensions and glsl for the final shader code.
|
||||
bconcat(extensions, glsl);
|
||||
bdestroy(glsl);
|
||||
psContext->glsl = extensions;
|
||||
glsl = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
2994
src/toGLSLDeclaration.cpp
Normal file
2994
src/toGLSLDeclaration.cpp
Normal file
File diff suppressed because it is too large
Load Diff
4127
src/toGLSLInstruction.cpp
Normal file
4127
src/toGLSLInstruction.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1616
src/toGLSLOperand.cpp
Normal file
1616
src/toGLSLOperand.cpp
Normal file
File diff suppressed because it is too large
Load Diff
265
src/toMetal.cpp
Normal file
265
src/toMetal.cpp
Normal file
@ -0,0 +1,265 @@
|
||||
|
||||
#include "internal_includes/toMetal.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/debug.h"
|
||||
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/toGLSL.h"
|
||||
#include "internal_includes/LoopTransform.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs)
|
||||
{
|
||||
StructDefinition &d = defs[sname];
|
||||
if (d.m_IsPrinted)
|
||||
return;
|
||||
d.m_IsPrinted = true;
|
||||
|
||||
|
||||
std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName)
|
||||
{
|
||||
PrintStructDeclaration(psContext, glsl, depName, defs);
|
||||
});
|
||||
|
||||
bformata(glsl, "struct %s\n{\n", sname.c_str());
|
||||
psContext->indent++;
|
||||
std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, mem.c_str());
|
||||
bcatcstr(glsl, ";\n");
|
||||
});
|
||||
|
||||
psContext->indent--;
|
||||
bcatcstr(glsl, "};\n\n");
|
||||
}
|
||||
|
||||
void ToMetal::PrintStructDeclarations(StructDefinitions &defs)
|
||||
{
|
||||
bstring glsl = *psContext->currentGLSLString;
|
||||
StructDefinition &args = defs[""];
|
||||
std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname)
|
||||
{
|
||||
PrintStructDeclaration(psContext, glsl, sname, defs);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
bool ToMetal::Translate()
|
||||
{
|
||||
bstring glsl;
|
||||
uint32_t i;
|
||||
Shader* psShader = psContext->psShader;
|
||||
psContext->psTranslator = this;
|
||||
|
||||
SetIOPrefixes();
|
||||
psShader->ExpandSWAPCs();
|
||||
psShader->ForcePositionToHighp();
|
||||
psShader->AnalyzeIOOverlap();
|
||||
psShader->FindUnusedGlobals(psContext->flags);
|
||||
|
||||
psContext->indent = 0;
|
||||
|
||||
glsl = bfromcstralloc(1024 * 10, "");
|
||||
bstring bodyglsl = bfromcstralloc(1024 * 10, "");
|
||||
|
||||
psContext->glsl = glsl;
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, "");
|
||||
psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
|
||||
}
|
||||
|
||||
psContext->currentGLSLString = &glsl;
|
||||
psShader->eTargetLanguage = LANG_METAL;
|
||||
psShader->extensions = NULL;
|
||||
psContext->currentPhase = MAIN_PHASE;
|
||||
|
||||
psContext->ClearDependencyData();
|
||||
|
||||
ClampPartialPrecisions();
|
||||
|
||||
psShader->PrepareStructuredBufferBindingSlots();
|
||||
|
||||
ShaderPhase &phase = psShader->asPhases[0];
|
||||
phase.UnvectorizeImmMoves();
|
||||
psContext->DoDataTypeAnalysis(&phase);
|
||||
phase.ResolveUAVProperties();
|
||||
psShader->ResolveStructuredBufferBindingSlots(&phase);
|
||||
phase.PruneConstArrays();
|
||||
HLSLcc::DoLoopTransform(phase);
|
||||
|
||||
psShader->PruneTempRegisters();
|
||||
|
||||
bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
|
||||
|
||||
|
||||
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
|
||||
}
|
||||
|
||||
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
|
||||
{
|
||||
m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]");
|
||||
m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName());
|
||||
}
|
||||
|
||||
if (psShader->eShaderType != COMPUTE_SHADER)
|
||||
{
|
||||
if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0)
|
||||
{
|
||||
m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName());
|
||||
}
|
||||
}
|
||||
|
||||
PrintStructDeclarations(m_StructDefinitions);
|
||||
|
||||
psContext->currentGLSLString = &bodyglsl;
|
||||
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
|
||||
break;
|
||||
case PIXEL_SHADER:
|
||||
bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n");
|
||||
break;
|
||||
case COMPUTE_SHADER:
|
||||
bcatcstr(bodyglsl, "kernel void computeMain(\n");
|
||||
break;
|
||||
default:
|
||||
// Not supported
|
||||
ASSERT(0);
|
||||
return false;
|
||||
}
|
||||
psContext->indent++;
|
||||
for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, itr->c_str());
|
||||
if (itr + 1 != m_StructDefinitions[""].m_Members.end())
|
||||
bcatcstr(bodyglsl, ",\n");
|
||||
}
|
||||
|
||||
bcatcstr(bodyglsl, ")\n{\n");
|
||||
if (psShader->eShaderType != COMPUTE_SHADER)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, GetOutputStructName().c_str());
|
||||
bcatcstr(bodyglsl, " output;\n");
|
||||
}
|
||||
|
||||
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
|
||||
#endif
|
||||
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
|
||||
#ifdef _DEBUG
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
|
||||
{
|
||||
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
bcatcstr(bodyglsl, "}\n");
|
||||
|
||||
psContext->currentGLSLString = &glsl;
|
||||
|
||||
bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str());
|
||||
|
||||
// Print out extra functions we generated
|
||||
std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p)
|
||||
{
|
||||
bcatcstr(glsl, p.second.c_str());
|
||||
bcatcstr(glsl, "\n");
|
||||
});
|
||||
|
||||
// And then the actual function body
|
||||
bconcat(glsl, bodyglsl);
|
||||
bdestroy(bodyglsl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body)
|
||||
{
|
||||
if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
|
||||
return;
|
||||
m_FunctionDefinitions.insert(std::make_pair(name, body));
|
||||
}
|
||||
|
||||
|
||||
std::string ToMetal::GetOutputStructName() const
|
||||
{
|
||||
switch(psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return "Mtl_VertexOut";
|
||||
case PIXEL_SHADER:
|
||||
return "Mtl_FragmentOut";
|
||||
default:
|
||||
ASSERT(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::string ToMetal::GetInputStructName() const
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return "Mtl_VertexIn";
|
||||
case PIXEL_SHADER:
|
||||
return "Mtl_FragmentIn";
|
||||
case COMPUTE_SHADER:
|
||||
return "Mtl_KernelIn";
|
||||
default:
|
||||
ASSERT(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void ToMetal::SetIOPrefixes()
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
psContext->inputPrefix = "input.";
|
||||
psContext->outputPrefix = "output.";
|
||||
break;
|
||||
|
||||
case PIXEL_SHADER:
|
||||
psContext->inputPrefix = "input.";
|
||||
psContext->outputPrefix = "output.";
|
||||
break;
|
||||
|
||||
case COMPUTE_SHADER:
|
||||
psContext->inputPrefix = "";
|
||||
psContext->outputPrefix = "";
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ToMetal::ClampPartialPrecisions()
|
||||
{
|
||||
HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL,
|
||||
[](std::vector<Instruction>::iterator &i, Operand *o, uint32_t flags)
|
||||
{
|
||||
if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8)
|
||||
o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16;
|
||||
});
|
||||
}
|
1979
src/toMetalDeclaration.cpp
Normal file
1979
src/toMetalDeclaration.cpp
Normal file
File diff suppressed because it is too large
Load Diff
3731
src/toMetalInstruction.cpp
Normal file
3731
src/toMetalInstruction.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1120
src/toMetalOperand.cpp
Normal file
1120
src/toMetalOperand.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user