Initial release (cfe8342494bbc2)

This commit is contained in:
Mikko Strandborg 2016-11-16 09:35:08 +02:00
parent 07a739239e
commit eea476093c
62 changed files with 37955 additions and 0 deletions

View File

@ -1,2 +1,50 @@
# HLSLcc
DirectX shader bytecode cross compiler
Originally based on https://github.com/James-Jones/HLSLCrossCompiler.
This library takes DirectX bytecode as input, and translates it into the following languages:
- GLSL (OpenGL 3.2 and later)
- GLSL ES (OpenGL ES 3.0 and later)
- GLSL ES for Vulkan consumption
- Metal Shading Language
This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan.
Changes from original HLSLCrossCompiler:
- Codebase changed to C++11, with major code reorganizations.
- Support for multiple language output backends (currently ToGLSL and ToMetal)
- Metal language output support
- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts).
- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form
- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers.
- Reflection interface to retrieve the shader inputs and their types.
- Lots of workarounds for various driver/shader compiler bugs.
- Lots of minor fixes and improvements for correctness
- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself.
## Note
This project does not include build files, or test suite, as they are integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile src/*.cpp (in C++11 mode!) and src/cbstring/*.c with the following include paths:
- include
- src/internal_includes
- src/cbstrinc
- src
The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input).
## Contributors
- Mikko Strandborg
- Juho Oravainen
- David Rogers
- Marton Ekler
- Antti Tapaninen
- Florian Penzkofer
- Alexey Orlov
- Povilas Kanapickas
## License
See license.txt.

493
include/ShaderInfo.h Normal file
View File

@ -0,0 +1,493 @@
#pragma once
#include <vector>
#include <set>
#include <map>
#include <string>
#include "growing_array.h"
#include <stdint.h>
//Reflection
#define MAX_RESOURCE_BINDINGS 256
typedef enum _SHADER_VARIABLE_TYPE {
SVT_VOID = 0,
SVT_BOOL = 1,
SVT_INT = 2,
SVT_FLOAT = 3,
SVT_STRING = 4,
SVT_TEXTURE = 5,
SVT_TEXTURE1D = 6,
SVT_TEXTURE2D = 7,
SVT_TEXTURE3D = 8,
SVT_TEXTURECUBE = 9,
SVT_SAMPLER = 10,
SVT_PIXELSHADER = 15,
SVT_VERTEXSHADER = 16,
SVT_UINT = 19,
SVT_UINT8 = 20,
SVT_GEOMETRYSHADER = 21,
SVT_RASTERIZER = 22,
SVT_DEPTHSTENCIL = 23,
SVT_BLEND = 24,
SVT_BUFFER = 25,
SVT_CBUFFER = 26,
SVT_TBUFFER = 27,
SVT_TEXTURE1DARRAY = 28,
SVT_TEXTURE2DARRAY = 29,
SVT_RENDERTARGETVIEW = 30,
SVT_DEPTHSTENCILVIEW = 31,
SVT_TEXTURE2DMS = 32,
SVT_TEXTURE2DMSARRAY = 33,
SVT_TEXTURECUBEARRAY = 34,
SVT_HULLSHADER = 35,
SVT_DOMAINSHADER = 36,
SVT_INTERFACE_POINTER = 37,
SVT_COMPUTESHADER = 38,
SVT_DOUBLE = 39,
SVT_RWTEXTURE1D = 40,
SVT_RWTEXTURE1DARRAY = 41,
SVT_RWTEXTURE2D = 42,
SVT_RWTEXTURE2DARRAY = 43,
SVT_RWTEXTURE3D = 44,
SVT_RWBUFFER = 45,
SVT_BYTEADDRESS_BUFFER = 46,
SVT_RWBYTEADDRESS_BUFFER = 47,
SVT_STRUCTURED_BUFFER = 48,
SVT_RWSTRUCTURED_BUFFER = 49,
SVT_APPEND_STRUCTURED_BUFFER = 50,
SVT_CONSUME_STRUCTURED_BUFFER = 51,
// Only used as a marker when analyzing register types
SVT_FORCED_INT = 152,
// Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis
SVT_INT_AMBIGUOUS = 153,
// Partial precision types. Used when doing type analysis
SVT_FLOAT10 = 53, // Seems to be used in constant buffers
SVT_FLOAT16 = 54,
SVT_INT16 = 156,
SVT_INT12 = 157,
SVT_UINT16 = 158,
SVT_FORCE_DWORD = 0x7fffffff
} SHADER_VARIABLE_TYPE;
typedef enum _SHADER_VARIABLE_CLASS {
SVC_SCALAR = 0,
SVC_VECTOR = (SVC_SCALAR + 1),
SVC_MATRIX_ROWS = (SVC_VECTOR + 1),
SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1),
SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1),
SVC_STRUCT = (SVC_OBJECT + 1),
SVC_INTERFACE_CLASS = (SVC_STRUCT + 1),
SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1),
SVC_FORCE_DWORD = 0x7fffffff
} SHADER_VARIABLE_CLASS;
///////////////////////////////////////
// Types
enum TESSELLATOR_PARTITIONING
{
TESSELLATOR_PARTITIONING_UNDEFINED = 0,
TESSELLATOR_PARTITIONING_INTEGER = 1,
TESSELLATOR_PARTITIONING_POW2 = 2,
TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
};
enum TESSELLATOR_OUTPUT_PRIMITIVE
{
TESSELLATOR_OUTPUT_UNDEFINED = 0,
TESSELLATOR_OUTPUT_POINT = 1,
TESSELLATOR_OUTPUT_LINE = 2,
TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
};
enum SPECIAL_NAME
{
NAME_UNDEFINED = 0,
NAME_POSITION = 1,
NAME_CLIP_DISTANCE = 2,
NAME_CULL_DISTANCE = 3,
NAME_RENDER_TARGET_ARRAY_INDEX = 4,
NAME_VIEWPORT_ARRAY_INDEX = 5,
NAME_VERTEX_ID = 6,
NAME_PRIMITIVE_ID = 7,
NAME_INSTANCE_ID = 8,
NAME_IS_FRONT_FACE = 9,
NAME_SAMPLE_INDEX = 10,
// The following are added for D3D11
NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11,
NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12,
NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13,
NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14,
NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15,
NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16,
NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17,
NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18,
NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19,
NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20,
NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
};
enum INOUT_COMPONENT_TYPE {
INOUT_COMPONENT_UNKNOWN = 0,
INOUT_COMPONENT_UINT32 = 1,
INOUT_COMPONENT_SINT32 = 2,
INOUT_COMPONENT_FLOAT32 = 3
};
enum MIN_PRECISION {
MIN_PRECISION_DEFAULT = 0,
MIN_PRECISION_FLOAT_16 = 1,
MIN_PRECISION_FLOAT_2_8 = 2,
MIN_PRECISION_RESERVED = 3,
MIN_PRECISION_SINT_16 = 4,
MIN_PRECISION_UINT_16 = 5,
MIN_PRECISION_ANY_16 = 0xf0,
MIN_PRECISION_ANY_10 = 0xf1
};
enum ResourceType
{
RTYPE_CBUFFER,//0
RTYPE_TBUFFER,//1
RTYPE_TEXTURE,//2
RTYPE_SAMPLER,//3
RTYPE_UAV_RWTYPED,//4
RTYPE_STRUCTURED,//5
RTYPE_UAV_RWSTRUCTURED,//6
RTYPE_BYTEADDRESS,//7
RTYPE_UAV_RWBYTEADDRESS,//8
RTYPE_UAV_APPEND_STRUCTURED,//9
RTYPE_UAV_CONSUME_STRUCTURED,//10
RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11
RTYPE_COUNT,
};
enum ResourceGroup {
RGROUP_CBUFFER,
RGROUP_TEXTURE,
RGROUP_SAMPLER,
RGROUP_UAV,
RGROUP_COUNT,
};
enum REFLECT_RESOURCE_DIMENSION
{
REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0,
REFLECT_RESOURCE_DIMENSION_BUFFER = 1,
REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2,
REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3,
REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7,
REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8,
REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9,
REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11,
};
enum REFLECT_RESOURCE_PRECISION
{
REFLECT_RESOURCE_PRECISION_UNKNOWN = 0,
REFLECT_RESOURCE_PRECISION_LOWP = 1,
REFLECT_RESOURCE_PRECISION_MEDIUMP = 2,
REFLECT_RESOURCE_PRECISION_HIGHP = 3,
};
enum RESOURCE_RETURN_TYPE
{
RETURN_TYPE_UNORM = 1,
RETURN_TYPE_SNORM = 2,
RETURN_TYPE_SINT = 3,
RETURN_TYPE_UINT = 4,
RETURN_TYPE_FLOAT = 5,
RETURN_TYPE_MIXED = 6,
RETURN_TYPE_DOUBLE = 7,
RETURN_TYPE_CONTINUED = 8,
RETURN_TYPE_UNUSED = 9,
};
typedef std::map<std::string, REFLECT_RESOURCE_PRECISION> HLSLccSamplerPrecisionInfo;
struct ResourceBinding
{
std::string name;
ResourceType eType;
uint32_t ui32BindPoint;
uint32_t ui32BindCount;
uint32_t ui32Flags;
REFLECT_RESOURCE_DIMENSION eDimension;
RESOURCE_RETURN_TYPE ui32ReturnType;
uint32_t ui32NumSamples;
REFLECT_RESOURCE_PRECISION ePrecision;
SHADER_VARIABLE_TYPE GetDataType() const
{
switch (ePrecision)
{
case REFLECT_RESOURCE_PRECISION_LOWP:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT10;
case RETURN_TYPE_SINT:
return SVT_INT16;
case RETURN_TYPE_UINT:
return SVT_UINT16;
default:
// ASSERT(0);
return SVT_FLOAT10;
}
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT16;
case RETURN_TYPE_SINT:
return SVT_INT16;
case RETURN_TYPE_UINT:
return SVT_UINT16;
default:
// ASSERT(0);
return SVT_FLOAT16;
}
default:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT;
case RETURN_TYPE_SINT:
return SVT_INT;
case RETURN_TYPE_UINT:
return SVT_UINT;
case RETURN_TYPE_DOUBLE:
return SVT_DOUBLE;
default:
// ASSERT(0);
return SVT_FLOAT;
}
}
}
};
struct ShaderVarType
{
ShaderVarType() :
Class(),
Type(),
Rows(),
Columns(),
Elements(),
MemberCount(),
Offset(),
ParentCount(),
Parent(),
m_IsUsed(false)
{}
SHADER_VARIABLE_CLASS Class;
SHADER_VARIABLE_TYPE Type;
uint32_t Rows;
uint32_t Columns;
uint32_t Elements;
uint32_t MemberCount;
uint32_t Offset;
std::string name;
uint32_t ParentCount;
struct ShaderVarType * Parent;
//Includes all parent names.
std::string fullName;
std::vector<struct ShaderVarType> Members;
bool m_IsUsed; // If not set, is not used in the shader code
uint32_t GetMemberCount() const
{
if (Class == SVC_STRUCT)
{
uint32_t res = 0;
std::vector<struct ShaderVarType>::const_iterator itr;
for (itr = Members.begin(); itr != Members.end(); itr++)
{
res += itr->GetMemberCount();
}
return res;
}
else
return 1;
}
};
struct ShaderVar
{
std::string name;
int haveDefaultValue;
std::vector<uint32_t> pui32DefaultValues;
//Offset/Size in bytes.
uint32_t ui32StartOffset;
uint32_t ui32Size;
ShaderVarType sType;
};
struct ConstantBuffer
{
std::string name;
std::vector<ShaderVar> asVars;
uint32_t ui32TotalSizeInBytes;
uint32_t GetMemberCount(bool stripUnused) const
{
uint32_t res = 0;
std::vector<ShaderVar>::const_iterator itr;
for (itr = asVars.begin(); itr != asVars.end(); itr++)
{
if(stripUnused && !itr->sType.m_IsUsed)
continue;
res += itr->sType.GetMemberCount();
}
return res;
}
};
struct ClassType
{
std::string name;
uint16_t ui16ID;
uint16_t ui16ConstBufStride;
uint16_t ui16Texture;
uint16_t ui16Sampler;
};
struct ClassInstance
{
std::string name;
uint16_t ui16ID;
uint16_t ui16ConstBuf;
uint16_t ui16ConstBufOffset;
uint16_t ui16Texture;
uint16_t ui16Sampler;
};
class Operand;
class ShaderInfo
{
public:
struct InOutSignature
{
std::string semanticName;
uint32_t ui32SemanticIndex;
SPECIAL_NAME eSystemValueType;
INOUT_COMPONENT_TYPE eComponentType;
uint32_t ui32Register;
uint32_t ui32Mask;
uint32_t ui32ReadWriteMask;
int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle
uint32_t ui32Stream;
MIN_PRECISION eMinPrec;
std::set<uint32_t> isIndexed; // Set of phases where this input/output is part of a index range.
std::map<uint32_t, uint32_t> indexStart; // If indexed, contains the start index for the range
std::map<uint32_t, uint32_t> index; // If indexed, contains the current index relative to the index start.
};
ShaderInfo() :
ui32MajorVersion(),
ui32MinorVersion(),
psResourceBindings(),
psConstantBuffers(),
psThisPointerConstBuffer(),
psClassTypes(),
psClassInstances()
{}
SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo);
int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const;
void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const;
int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const;
int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
int GetOutputSignatureFromRegister(const uint32_t ui32Register,
const uint32_t ui32CompMask,
const uint32_t ui32Stream,
const InOutSignature** ppsOut,
bool allowNull = false) const;
int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const;
static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t (&pui32Swizzle)[4],
const ConstantBuffer* psCBuf,
const ShaderVarType** ppsShaderVar,
bool* isArray,
std::vector<uint32_t>* arrayIndices,
int32_t* pi32Rebase,
uint32_t flags);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices);
// Apply shader precision information to resource bindings
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
uint32_t ui32MajorVersion;
uint32_t ui32MinorVersion;
std::vector<InOutSignature> psInputSignatures;
std::vector<InOutSignature> psOutputSignatures;
std::vector<InOutSignature> psPatchConstantSignatures;
std::vector<ResourceBinding> psResourceBindings;
std::vector<ConstantBuffer> psConstantBuffers;
ConstantBuffer* psThisPointerConstBuffer;
std::vector<ClassType> psClassTypes;
std::vector<ClassInstance> psClassInstances;
//Func table ID to class name ID.
HLSLcc::growing_vector<uint32_t> aui32TableIDToTypeID;
HLSLcc::growing_vector<uint32_t> aui32ResourceMap[RGROUP_COUNT];
HLSLcc::growing_vector<ShaderVarType> sGroupSharedVarType;
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
};

47
include/growing_array.h Normal file
View File

@ -0,0 +1,47 @@
#pragma once
namespace HLSLcc
{
// A vector that automatically grows when written to, fills the intermediate ones with default value.
// Reading from an index returns the default value if attempting to access out of bounds.
template <class T> class growing_vector
{
public:
growing_vector() : data() {}
std::vector<T> data;
T & operator[](std::size_t idx)
{
if (idx >= data.size())
data.resize((idx + 1) * 2);
return data[idx];
}
const T & operator[](std::size_t idx) const
{
static T defaultValue = T();
if (idx >= data.size())
return defaultValue;
return data[idx];
}
};
// Same but with bool specialization
template <> class growing_vector<bool>
{
public:
growing_vector() : data() {}
std::vector<bool> data;
std::vector<bool>::reference operator[](std::size_t idx)
{
if (idx >= data.size())
data.resize((idx + 1) * 2, false);
return data[idx];
}
};
};

454
include/hlslcc.h Normal file
View File

@ -0,0 +1,454 @@
#ifndef HLSLCC_H_
#define HLSLCC_H_
#include <string>
#include <vector>
#include <map>
#if defined (_WIN32) && defined(HLSLCC_DYNLIB)
#define HLSLCC_APIENTRY __stdcall
#if defined(libHLSLcc_EXPORTS)
#define HLSLCC_API __declspec(dllexport)
#else
#define HLSLCC_API __declspec(dllimport)
#endif
#else
#define HLSLCC_APIENTRY
#define HLSLCC_API
#endif
#include <stdint.h>
#include <string.h>
typedef enum
{
LANG_DEFAULT,// Depends on the HLSL shader model.
LANG_ES_100, LANG_ES_FIRST=LANG_ES_100,
LANG_ES_300,
LANG_ES_310, LANG_ES_LAST = LANG_ES_310,
LANG_120, LANG_GL_FIRST = LANG_120,
LANG_130,
LANG_140,
LANG_150,
LANG_330,
LANG_400,
LANG_410,
LANG_420,
LANG_430,
LANG_440, LANG_GL_LAST = LANG_440,
LANG_METAL,
} GLLang;
typedef struct GlExtensions {
uint32_t ARB_explicit_attrib_location : 1;
uint32_t ARB_explicit_uniform_location : 1;
uint32_t ARB_shading_language_420pack : 1;
}GlExtensions;
#include "ShaderInfo.h"
typedef std::vector<std::string> TextureSamplerPairs;
typedef enum INTERPOLATION_MODE
{
INTERPOLATION_UNDEFINED = 0,
INTERPOLATION_CONSTANT = 1,
INTERPOLATION_LINEAR = 2,
INTERPOLATION_LINEAR_CENTROID = 3,
INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
INTERPOLATION_LINEAR_SAMPLE = 6,
INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7,
} INTERPOLATION_MODE;
#define PS_FLAG_VERTEX_SHADER 0x1
#define PS_FLAG_HULL_SHADER 0x2
#define PS_FLAG_DOMAIN_SHADER 0x4
#define PS_FLAG_GEOMETRY_SHADER 0x8
#define PS_FLAG_PIXEL_SHADER 0x10
#define TO_FLAG_NONE 0x0
#define TO_FLAG_INTEGER 0x1
#define TO_FLAG_NAME_ONLY 0x2
#define TO_FLAG_DECLARATION_NAME 0x4
#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment.
#define TO_FLAG_UNSIGNED_INTEGER 0x10
#define TO_FLAG_DOUBLE 0x20
// --- TO_AUTO_BITCAST_TO_FLOAT ---
//If the operand is an integer temp variable then this flag
//indicates that the temp has a valid floating point encoding
//and that the current expression expects the operand to be floating point
//and therefore intBitsToFloat must be applied to that variable.
#define TO_AUTO_BITCAST_TO_FLOAT 0x40
#define TO_AUTO_BITCAST_TO_INT 0x80
#define TO_AUTO_BITCAST_TO_UINT 0x100
// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX
// to match HLSL functionality.
#define TO_AUTO_EXPAND_TO_VEC2 0x200
#define TO_AUTO_EXPAND_TO_VEC3 0x400
#define TO_AUTO_EXPAND_TO_VEC4 0x800
#define TO_FLAG_BOOL 0x1000
// These flags are only used for Metal:
// Force downscaling of the operand to match
// the other operand (Metal doesn't like mixing halfs with floats)
#define TO_FLAG_FORCE_HALF 0x2000
typedef enum
{
INVALID_SHADER = -1,
PIXEL_SHADER,
VERTEX_SHADER,
GEOMETRY_SHADER,
HULL_SHADER,
DOMAIN_SHADER,
COMPUTE_SHADER,
} SHADER_TYPE;
// Enum for texture dimension reflection data
typedef enum
{
TD_FLOAT = 0,
TD_INT,
TD_2D,
TD_3D,
TD_CUBE,
TD_2DSHADOW,
TD_2DARRAY,
TD_CUBEARRAY
} HLSLCC_TEX_DIMENSION;
// The prefix for all temporary variables used by the generated code.
// Using a texture or uniform name like this will cause conflicts
#define HLSLCC_TEMP_PREFIX "u_xlat"
//The shader stages (Vertex, Pixel et al) do not depend on each other
//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
//that hull shaders must be compiled before domain shaders, and
//the pixel shader must be compiled before all of the others.
//During compilation the GLSLCrossDependencyData struct will
//carry over any information needed about a different shader stage
//in order to construct valid GLSL shader combinations.
//Using GLSLCrossDependencyData is optional. However some shader
//combinations may show link failures, or runtime errors.
class GLSLCrossDependencyData
{
public:
// A container for a single Vulkan resource binding (<set, binding> pair)
typedef std::pair<uint32_t, uint32_t> VulkanResourceBinding;
private:
//Required if PixelInterpDependency is true
std::vector<INTERPOLATION_MODE> pixelInterpolation;
// Map of varying locations, indexed by varying names.
typedef std::map<std::string, uint32_t> VaryingLocations;
static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output)
VaryingLocations varyingLocationsMap[MAX_NAMESPACES];
uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES];
typedef std::map<std::string, VulkanResourceBinding> VulkanResourceBindings;
VulkanResourceBindings m_VulkanResourceBindings;
uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set.
inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
{
switch (eShaderType)
{
case VERTEX_SHADER:
return isInput ? 0 : 1;
case HULL_SHADER:
return isInput ? 1 : 2;
case DOMAIN_SHADER:
return isInput ? 2 : 3;
case GEOMETRY_SHADER:
// The input depends on whether there's a tessellation shader before us
if (isInput)
{
return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1;
}
return 4;
case PIXEL_SHADER:
// The inputs can come from geom shader, domain shader or directly from vertex shader
if (isInput)
{
if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
{
return 4;
}
else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
{
return 3;
}
else
{
return 1;
}
}
return 5; // This value never really used
default:
return 0;
}
}
public:
GLSLCrossDependencyData()
: eTessPartitioning(),
eTessOutPrim(),
ui32ProgramStages(0)
{
memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
}
// Retrieve the location for a varying with a given name.
// If the name doesn't already have an allocated location, allocate one
// and store it into the map.
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput)
{
int nspace = GetVaryingNamespace(eShaderType, isInput);
VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
if (itr != varyingLocationsMap[nspace].end())
return itr->second;
uint32_t newKey = nextAvailableVaryingLocation[nspace];
nextAvailableVaryingLocation[nspace]++;
varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
return newKey;
}
// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
// If not found, allocate a new one (in set 0) and return that
// The returned value is a pair of <set, binding>
// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
inline std::pair<uint32_t, uint32_t> GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
{
// scan for the special marker
const char *marker = "Xhlslcc_set_%d_bind_%dX";
uint32_t Set = 0, Binding = 0;
size_t startLoc = name.find("Xhlslcc");
if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2))
{
// Get rid of all markers
while ((startLoc = name.find("Xhlslcc")) != std::string::npos)
{
size_t endLoc = name.find('X', startLoc + 1);
if (endLoc == std::string::npos)
break;
name.erase(startLoc, endLoc - startLoc + 1);
}
// Add to map
VulkanResourceBinding newBind = std::make_pair(Set, Binding);
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = std::make_pair(Set, Binding+1);
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
return newBind;
}
VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name);
if (itr != m_VulkanResourceBindings.end())
return itr->second;
// Allocate a new one
VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]);
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
return newBind;
}
//dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
//but they appear on inputs inside domain shaders for GL.
//Hull shader must be compiled before domain so the
//ensure correct partitioning and primitive type information
//can be saved when compiling hull and passed to domain compilation.
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
// Needed so we can construct proper shader input and output names
uint32_t ui32ProgramStages;
inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo)
{
if (regNo >= pixelInterpolation.size())
return INTERPOLATION_UNDEFINED;
else
return pixelInterpolation[regNo];
}
inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode)
{
if (regNo >= pixelInterpolation.size())
pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED);
pixelInterpolation[regNo] = mode;
}
inline void ClearCrossDependencyData()
{
pixelInterpolation.clear();
for (int i = 0; i < MAX_NAMESPACES; i++)
{
varyingLocationsMap[i].clear();
nextAvailableVaryingLocation[i] = 0;
}
}
};
struct GLSLShader
{
int shaderType; //One of the GL enums.
std::string sourceCode;
ShaderInfo reflection;
GLLang GLSLLanguage;
TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out
};
// Interface for retrieving reflection and diagnostics data
class HLSLccReflection
{
public:
HLSLccReflection() {}
virtual ~HLSLccReflection() {}
// Called on errors or diagnostic messages
virtual void OnDiagnostics(const std::string &error, int line, bool isError) {}
virtual void OnInputBinding(const std::string &name, int bindIndex) {}
virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; }
virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; }
virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
virtual void OnTextureBinding(const std::string &name, int bindIndex, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
};
/*HLSL constant buffers are treated as default-block unform arrays by default. This is done
to support versions of GLSL which lack ARB_uniform_buffer_object functionality.
Setting this flag causes each one to have its own uniform block.
Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1;
static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2;
static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4;
static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8;
//GS enabled?
//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS).
//This flag is needed in order for the interfaces between stages to match when GS is in use.
//PS inputs VtxGeoOutput
//GS outputs VtxGeoOutput
//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise.
static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10;
static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20;
//Either use this flag or glBindFragDataLocationIndexed.
//When set the first pixel shader output is the first input to blend
//equation, the others go to the second input.
static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40;
//If set, shader inputs and outputs are declared with their semantic name.
static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80;
//If set, shader inputs and outputs are declared with their semantic name appended.
static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100;
//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername".
static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200;
//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that)
static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400;
//If set, global uniforms are not stored in a struct.
static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800;
//If set, image declarations will always have binding and format qualifiers.
static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers
// Also removes that prefix from generated output
static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
// If set, adds location qualifiers to intra-shader varyings.
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000;
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS
// so that if that macro is defined, all UBO declarations will become normal uniforms
static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000;
#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d"
// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtx<rows>x<cols>'
static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000;
// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "<Name>_hlslcc_set_X_bind_Y"
// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData)
static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
#ifdef __cplusplus
extern "C" {
#endif
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result
);
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result);
#ifdef __cplusplus
}
#endif
#endif

5
include/hlslcc.hpp Normal file
View File

@ -0,0 +1,5 @@
extern "C" {
#include "hlslcc.h"
}

800
include/pstdint.h Normal file
View File

@ -0,0 +1,800 @@
/* A portable stdint.h
****************************************************************************
* BSD License:
****************************************************************************
*
* Copyright (c) 2005-2011 Paul Hsieh
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************
*
* Version 0.1.12
*
* The ANSI C standard committee, for the C99 standard, specified the
* inclusion of a new standard include file called stdint.h. This is
* a very useful and long desired include file which contains several
* very precise definitions for integer scalar types that is
* critically important for making portable several classes of
* applications including cryptography, hashing, variable length
* integer libraries and so on. But for most developers its likely
* useful just for programming sanity.
*
* The problem is that most compiler vendors have decided not to
* implement the C99 standard, and the next C++ language standard
* (which has a lot more mindshare these days) will be a long time in
* coming and its unknown whether or not it will include stdint.h or
* how much adoption it will have. Either way, it will be a long time
* before all compilers come with a stdint.h and it also does nothing
* for the extremely large number of compilers available today which
* do not include this file, or anything comparable to it.
*
* So that's what this file is all about. Its an attempt to build a
* single universal include file that works on as many platforms as
* possible to deliver what stdint.h is supposed to. A few things
* that should be noted about this file:
*
* 1) It is not guaranteed to be portable and/or present an identical
* interface on all platforms. The extreme variability of the
* ANSI C standard makes this an impossibility right from the
* very get go. Its really only meant to be useful for the vast
* majority of platforms that possess the capability of
* implementing usefully and precisely defined, standard sized
* integer scalars. Systems which are not intrinsically 2s
* complement may produce invalid constants.
*
* 2) There is an unavoidable use of non-reserved symbols.
*
* 3) Other standard include files are invoked.
*
* 4) This file may come in conflict with future platforms that do
* include stdint.h. The hope is that one or the other can be
* used with no real difference.
*
* 5) In the current verison, if your platform can't represent
* int32_t, int16_t and int8_t, it just dumps out with a compiler
* error.
*
* 6) 64 bit integers may or may not be defined. Test for their
* presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
* Note that this is different from the C99 specification which
* requires the existence of 64 bit support in the compiler. If
* this is not defined for your platform, yet it is capable of
* dealing with 64 bits then it is because this file has not yet
* been extended to cover all of your system's capabilities.
*
* 7) (u)intptr_t may or may not be defined. Test for its presence
* with the test: #ifdef PTRDIFF_MAX. If this is not defined
* for your platform, then it is because this file has not yet
* been extended to cover all of your system's capabilities, not
* because its optional.
*
* 8) The following might not been defined even if your platform is
* capable of defining it:
*
* WCHAR_MIN
* WCHAR_MAX
* (u)int64_t
* PTRDIFF_MIN
* PTRDIFF_MAX
* (u)intptr_t
*
* 9) The following have not been defined:
*
* WINT_MIN
* WINT_MAX
*
* 10) The criteria for defining (u)int_least(*)_t isn't clear,
* except for systems which don't have a type that precisely
* defined 8, 16, or 32 bit types (which this include file does
* not support anyways). Default definitions have been given.
*
* 11) The criteria for defining (u)int_fast(*)_t isn't something I
* would trust to any particular compiler vendor or the ANSI C
* committee. It is well known that "compatible systems" are
* commonly created that have very different performance
* characteristics from the systems they are compatible with,
* especially those whose vendors make both the compiler and the
* system. Default definitions have been given, but its strongly
* recommended that users never use these definitions for any
* reason (they do *NOT* deliver any serious guarantee of
* improved performance -- not in this file, nor any vendor's
* stdint.h).
*
* 12) The following macros:
*
* PRINTF_INTMAX_MODIFIER
* PRINTF_INT64_MODIFIER
* PRINTF_INT32_MODIFIER
* PRINTF_INT16_MODIFIER
* PRINTF_LEAST64_MODIFIER
* PRINTF_LEAST32_MODIFIER
* PRINTF_LEAST16_MODIFIER
* PRINTF_INTPTR_MODIFIER
*
* are strings which have been defined as the modifiers required
* for the "d", "u" and "x" printf formats to correctly output
* (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
* (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
* PRINTF_INTPTR_MODIFIER is not defined for some systems which
* provide their own stdint.h. PRINTF_INT64_MODIFIER is not
* defined if INT64_MAX is not defined. These are an extension
* beyond what C99 specifies must be in stdint.h.
*
* In addition, the following macros are defined:
*
* PRINTF_INTMAX_HEX_WIDTH
* PRINTF_INT64_HEX_WIDTH
* PRINTF_INT32_HEX_WIDTH
* PRINTF_INT16_HEX_WIDTH
* PRINTF_INT8_HEX_WIDTH
* PRINTF_INTMAX_DEC_WIDTH
* PRINTF_INT64_DEC_WIDTH
* PRINTF_INT32_DEC_WIDTH
* PRINTF_INT16_DEC_WIDTH
* PRINTF_INT8_DEC_WIDTH
*
* Which specifies the maximum number of characters required to
* print the number of that type in either hexadecimal or decimal.
* These are an extension beyond what C99 specifies must be in
* stdint.h.
*
* Compilers tested (all with 0 warnings at their highest respective
* settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
* bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
* .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
*
* This file should be considered a work in progress. Suggestions for
* improvements, especially those which increase coverage are strongly
* encouraged.
*
* Acknowledgements
*
* The following people have made significant contributions to the
* development and testing of this file:
*
* Chris Howie
* John Steele Scott
* Dave Thorup
* John Dill
*
*/
#include <stddef.h>
#include <limits.h>
#include <signal.h>
/*
* For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
* do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
*/
#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
#include <stdint.h>
#define _PSTDINT_H_INCLUDED
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
# endif
# ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
# endif
# ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
# endif
# ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
# endif
# ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
# endif
# ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
# endif
# ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
# endif
# ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
/*
* Something really weird is going on with Open Watcom. Just pull some of
* these duplicated definitions from Open Watcom's stdint.h file for now.
*/
# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
# if !defined (INT64_C)
# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
# endif
# if !defined (UINT64_C)
# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
# endif
# if !defined (INT32_C)
# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
# endif
# if !defined (UINT32_C)
# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
# endif
# if !defined (INT16_C)
# define INT16_C(x) (x)
# endif
# if !defined (UINT16_C)
# define UINT16_C(x) (x)
# endif
# if !defined (INT8_C)
# define INT8_C(x) (x)
# endif
# if !defined (UINT8_C)
# define UINT8_C(x) (x)
# endif
# if !defined (UINT64_MAX)
# define UINT64_MAX 18446744073709551615ULL
# endif
# if !defined (INT64_MAX)
# define INT64_MAX 9223372036854775807LL
# endif
# if !defined (UINT32_MAX)
# define UINT32_MAX 4294967295UL
# endif
# if !defined (INT32_MAX)
# define INT32_MAX 2147483647L
# endif
# if !defined (INTMAX_MAX)
# define INTMAX_MAX INT64_MAX
# endif
# if !defined (INTMAX_MIN)
# define INTMAX_MIN INT64_MIN
# endif
# endif
#endif
#ifndef _PSTDINT_H_INCLUDED
#define _PSTDINT_H_INCLUDED
#ifndef SIZE_MAX
# define SIZE_MAX (~(size_t)0)
#endif
/*
* Deduce the type assignments from limits.h under the assumption that
* integer sizes in bits are powers of 2, and follow the ANSI
* definitions.
*/
#ifndef UINT8_MAX
# define UINT8_MAX 0xff
#endif
#ifndef uint8_t
# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
typedef unsigned char uint8_t;
# define UINT8_C(v) ((uint8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef INT8_MAX
# define INT8_MAX 0x7f
#endif
#ifndef INT8_MIN
# define INT8_MIN INT8_C(0x80)
#endif
#ifndef int8_t
# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
typedef signed char int8_t;
# define INT8_C(v) ((int8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef UINT16_MAX
# define UINT16_MAX 0xffff
#endif
#ifndef uint16_t
#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
typedef unsigned int uint16_t;
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
# define UINT16_C(v) ((uint16_t) (v))
#elif (USHRT_MAX == UINT16_MAX)
typedef unsigned short uint16_t;
# define UINT16_C(v) ((uint16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT16_MAX
# define INT16_MAX 0x7fff
#endif
#ifndef INT16_MIN
# define INT16_MIN INT16_C(0x8000)
#endif
#ifndef int16_t
#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
typedef signed int int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
#elif (SHRT_MAX == INT16_MAX)
typedef signed short int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef UINT32_MAX
# define UINT32_MAX (0xffffffffUL)
#endif
#ifndef uint32_t
#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
typedef unsigned long uint32_t;
# define UINT32_C(v) v ## UL
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (UINT_MAX == UINT32_MAX)
typedef unsigned int uint32_t;
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
# define UINT32_C(v) v ## U
#elif (USHRT_MAX == UINT32_MAX)
typedef unsigned short uint32_t;
# define UINT32_C(v) ((unsigned short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT32_MAX
# define INT32_MAX (0x7fffffffL)
#endif
#ifndef INT32_MIN
# define INT32_MIN INT32_C(0x80000000)
#endif
#ifndef int32_t
#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
typedef signed long int32_t;
# define INT32_C(v) v ## L
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (INT_MAX == INT32_MAX)
typedef signed int int32_t;
# define INT32_C(v) v
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#elif (SHRT_MAX == INT32_MAX)
typedef signed short int32_t;
# define INT32_C(v) ((short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
/*
* The macro stdint_int64_defined is temporarily used to record
* whether or not 64 integer support is available. It must be
* defined for any 64 integer extensions for new platforms that are
* added.
*/
#undef stdint_int64_defined
#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# endif
#endif
#if !defined (stdint_int64_defined)
# if defined(__GNUC__)
# define stdint_int64_defined
__extension__ typedef long long int64_t;
__extension__ typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
# define stdint_int64_defined
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
# define UINT64_C(v) v ## UI64
# define INT64_C(v) v ## I64
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "I64"
# endif
# endif
#endif
#if !defined (LONG_LONG_MAX) && defined (INT64_C)
# define LONG_LONG_MAX INT64_C (9223372036854775807)
#endif
#ifndef ULONG_LONG_MAX
# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
#endif
#if !defined (INT64_MAX) && defined (INT64_C)
# define INT64_MAX INT64_C (9223372036854775807)
#endif
#if !defined (INT64_MIN) && defined (INT64_C)
# define INT64_MIN INT64_C (-9223372036854775808)
#endif
#if !defined (UINT64_MAX) && defined (INT64_C)
# define UINT64_MAX UINT64_C (18446744073709551615)
#endif
/*
* Width of hexadecimal for number field.
*/
#ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
#endif
#ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
#endif
#ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
#endif
#ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
#endif
#ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
#endif
#ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
#endif
#ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
#endif
#ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
#endif
/*
* Ok, lets not worry about 128 bit integers for now. Moore's law says
* we don't need to worry about that until about 2040 at which point
* we'll have bigger things to worry about.
*/
#ifdef stdint_int64_defined
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
# define INTMAX_MAX INT64_MAX
# define INTMAX_MIN INT64_MIN
# define UINTMAX_MAX UINT64_MAX
# define UINTMAX_C(v) UINT64_C(v)
# define INTMAX_C(v) INT64_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
#else
typedef int32_t intmax_t;
typedef uint32_t uintmax_t;
# define INTMAX_MAX INT32_MAX
# define UINTMAX_MAX UINT32_MAX
# define UINTMAX_C(v) UINT32_C(v)
# define INTMAX_C(v) INT32_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
# endif
#endif
/*
* Because this file currently only supports platforms which have
* precise powers of 2 as bit sizes for the default integers, the
* least definitions are all trivial. Its possible that a future
* version of this file could have different definitions.
*/
#ifndef stdint_least_defined
typedef int8_t int_least8_t;
typedef uint8_t uint_least8_t;
typedef int16_t int_least16_t;
typedef uint16_t uint_least16_t;
typedef int32_t int_least32_t;
typedef uint32_t uint_least32_t;
# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
# define UINT_LEAST8_MAX UINT8_MAX
# define INT_LEAST8_MAX INT8_MAX
# define UINT_LEAST16_MAX UINT16_MAX
# define INT_LEAST16_MAX INT16_MAX
# define UINT_LEAST32_MAX UINT32_MAX
# define INT_LEAST32_MAX INT32_MAX
# define INT_LEAST8_MIN INT8_MIN
# define INT_LEAST16_MIN INT16_MIN
# define INT_LEAST32_MIN INT32_MIN
# ifdef stdint_int64_defined
typedef int64_t int_least64_t;
typedef uint64_t uint_least64_t;
# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
# define UINT_LEAST64_MAX UINT64_MAX
# define INT_LEAST64_MAX INT64_MAX
# define INT_LEAST64_MIN INT64_MIN
# endif
#endif
#undef stdint_least_defined
/*
* The ANSI C committee pretending to know or specify anything about
* performance is the epitome of misguided arrogance. The mandate of
* this file is to *ONLY* ever support that absolute minimum
* definition of the fast integer types, for compatibility purposes.
* No extensions, and no attempt to suggest what may or may not be a
* faster integer type will ever be made in this file. Developers are
* warned to stay away from these types when using this or any other
* stdint.h.
*/
typedef int_least8_t int_fast8_t;
typedef uint_least8_t uint_fast8_t;
typedef int_least16_t int_fast16_t;
typedef uint_least16_t uint_fast16_t;
typedef int_least32_t int_fast32_t;
typedef uint_least32_t uint_fast32_t;
#define UINT_FAST8_MAX UINT_LEAST8_MAX
#define INT_FAST8_MAX INT_LEAST8_MAX
#define UINT_FAST16_MAX UINT_LEAST16_MAX
#define INT_FAST16_MAX INT_LEAST16_MAX
#define UINT_FAST32_MAX UINT_LEAST32_MAX
#define INT_FAST32_MAX INT_LEAST32_MAX
#define INT_FAST8_MIN INT_LEAST8_MIN
#define INT_FAST16_MIN INT_LEAST16_MIN
#define INT_FAST32_MIN INT_LEAST32_MIN
#ifdef stdint_int64_defined
typedef int_least64_t int_fast64_t;
typedef uint_least64_t uint_fast64_t;
# define UINT_FAST64_MAX UINT_LEAST64_MAX
# define INT_FAST64_MAX INT_LEAST64_MAX
# define INT_FAST64_MIN INT_LEAST64_MIN
#endif
#undef stdint_int64_defined
/*
* Whatever piecemeal, per compiler thing we can do about the wchar_t
* type limits.
*/
#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
# include <wchar.h>
# ifndef WCHAR_MIN
# define WCHAR_MIN 0
# endif
# ifndef WCHAR_MAX
# define WCHAR_MAX ((wchar_t)-1)
# endif
#endif
/*
* Whatever piecemeal, per compiler/platform thing we can do about the
* (u)intptr_t types and limits.
*/
#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
# define STDINT_H_UINTPTR_T_DEFINED
#endif
#ifndef STDINT_H_UINTPTR_T_DEFINED
# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
# define stdint_intptr_bits 64
# elif defined (__WATCOMC__) || defined (__TURBOC__)
# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
# define stdint_intptr_bits 16
# else
# define stdint_intptr_bits 32
# endif
# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
# define stdint_intptr_bits 32
# elif defined (__INTEL_COMPILER)
/* TODO -- what did Intel do about x86-64? */
# endif
# ifdef stdint_intptr_bits
# define stdint_intptr_glue3_i(a,b,c) a##b##c
# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c)
# ifndef PRINTF_INTPTR_MODIFIER
# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
# endif
# ifndef PTRDIFF_MAX
# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef PTRDIFF_MIN
# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef UINTPTR_MAX
# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MAX
# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MIN
# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef INTPTR_C
# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
# endif
# ifndef UINTPTR_C
# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
# endif
typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t;
# else
/* TODO -- This following is likely wrong for some platforms, and does
nothing for the definition of uintptr_t. */
typedef ptrdiff_t intptr_t;
# endif
# define STDINT_H_UINTPTR_T_DEFINED
#endif
/*
* Assumes sig_atomic_t is signed and we have a 2s complement machine.
*/
#ifndef SIG_ATOMIC_MAX
# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
#endif
#endif
#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
/*
* Please compile with the maximum warning settings to make sure macros are not
* defined more than once.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define glue3_aux(x,y,z) x ## y ## z
#define glue3(x,y,z) glue3_aux(x,y,z)
#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
#define DECL(us,bits) glue3(DECL,us,) (bits)
#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
int main () {
DECL(I,8)
DECL(U,8)
DECL(I,16)
DECL(U,16)
DECL(I,32)
DECL(U,32)
#ifdef INT64_MAX
DECL(I,64)
DECL(U,64)
#endif
intmax_t imax = INTMAX_C(0);
uintmax_t umax = UINTMAX_C(0);
char str0[256], str1[256];
sprintf (str0, "%d %x\n", 0, ~0);
sprintf (str1, "%d %x\n", i8, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
sprintf (str1, "%u %x\n", u8, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
sprintf (str1, "%d %x\n", i16, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
sprintf (str1, "%u %x\n", u16, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
#ifdef INT64_MAX
sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
#endif
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
TESTUMAX(8);
TESTUMAX(16);
TESTUMAX(32);
#ifdef INT64_MAX
TESTUMAX(64);
#endif
return EXIT_SUCCESS;
}
#endif

53
license.txt Normal file
View File

@ -0,0 +1,53 @@
Original HLSLcc source code Copyright (c) 2012 James Jones
Further improvements Copyright (c) 2014-2016 Unity Technologies
All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
This software makes use of the bstring library which is provided under the following license:
Copyright (c) 2002-2008 Paul Hsieh
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of bstrlib nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

824
src/ControlFlowGraph.cpp Normal file
View File

@ -0,0 +1,824 @@
#include "internal_includes/debug.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/ControlFlowGraphUtils.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
#include "internal_includes/HLSLccToolkit.h"
#include <algorithm>
using namespace HLSLcc::ControlFlow;
using HLSLcc::ForEachOperand;
const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction)
{
using std::for_each;
m_BlockMap.clear();
m_BlockStorage.clear();
// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL);
// Build the reachable set for each block
bool hadChanges;
do
{
hadChanges = false;
for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr<BasicBlock> &bb)
{
BasicBlock &b = *bb.get();
if (b.RebuildReachable())
{
hadChanges = true;
}
});
} while (hadChanges == true);
return *root;
}
const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const
{
BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
if (itr == m_BlockMap.end())
return NULL;
return itr->second;
}
BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction)
{
BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
if (itr == m_BlockMap.end())
return NULL;
return itr->second;
}
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
// Auto-registers itself into ControlFlowGraph
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead)
: m_Graph(graph)
, m_First(psFirst)
, m_Last(NULL)
{
m_UEVar.clear();
m_VarKill.clear();
m_Preceding.clear();
m_Succeeding.clear();
m_DEDef.clear();
m_Reachable.clear();
// Check that we've pruned the labels
ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst));
// Insert to block storage, block map and connect to previous block
m_Graph.m_BlockStorage.push_back(shared_ptr<BasicBlock>(this));
bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second;
ASSERT(didInsert);
if (psPrecedingBlockHead != NULL)
{
m_Preceding.insert(psPrecedingBlockHead);
BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead);
ASSERT(prec != 0);
didInsert = prec->m_Succeeding.insert(psFirst).second;
ASSERT(didInsert);
}
Build();
}
void BasicBlock::Build()
{
const Instruction *inst = m_First;
while (1)
{
// Process sources first
ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore
if (m_VarKill.find(regIdx) != m_VarKill.end())
continue;
// Add to UEVars set. Doesn't matter if it's already there.
m_UEVar.insert(regIdx);
}
return;
});
// Then the destination operands
ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Add to kill set. Dupes are fine, this is a set.
m_VarKill.insert(regIdx);
// Also into the downward definitions. Overwrite the previous definition in this basic block, if any
Definition d(psInst, psOperand);
m_DEDef[regIdx].clear();
m_DEDef[regIdx].insert(d);
}
return;
});
// Check for flow control instructions
bool blockDone = false;
switch (inst->eOpcode)
{
default:
break;
case OPCODE_RET:
blockDone = true;
break;
case OPCODE_RETC:
// Basic block is done, start a next one.
// There REALLY should be no existing blocks for this one
ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst+1)) == NULL);
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
blockDone = true;
break;
case OPCODE_LOOP:
case OPCODE_CASE:
case OPCODE_ENDIF:
case OPCODE_ENDSWITCH:
// Not a flow control branch, but need to start a new block anyway.
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
blockDone = true;
break;
// Branches
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CONTINUEC:
{
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
ASSERT(jumpPoint != NULL);
// The control branches to the next instruction or jumps to jumpPoint
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst+1));
AddChildBasicBlock(jumpPoint);
blockDone = true;
break;
}
case OPCODE_SWITCH:
{
bool sawEndSwitch = false;
bool needConnectToParent = false;
const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent);
ASSERT(jumpPoint != NULL);
while (1)
{
if(!sawEndSwitch || needConnectToParent)
AddChildBasicBlock(jumpPoint);
if (sawEndSwitch)
break;
// The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label
ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT);
jumpPoint = Utils::GetJumpPoint(jumpPoint-1, &sawEndSwitch, &needConnectToParent);
ASSERT(jumpPoint != NULL);
}
blockDone = true;
break;
}
// Non-conditional jumps
case OPCODE_BREAK:
case OPCODE_ELSE:
case OPCODE_CONTINUE:
case OPCODE_ENDLOOP:
{
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
ASSERT(jumpPoint != NULL);
AddChildBasicBlock(jumpPoint);
blockDone = true;
break;
}
}
if (blockDone)
break;
inst++;
}
// In initial building phase, just make m_Reachable equal to m_DEDef
m_Reachable = m_DEDef;
// Tag the end of the basic block
m_Last = inst;
// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
}
BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
{
// First see if this already exists
BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst);
if (b)
{
// Just add dependency and we're done
b->m_Preceding.insert(m_First);
m_Succeeding.insert(psFirst);
return b;
}
// Otherwise create one. Self-registering and self-connecting
return new BasicBlock(psFirst, m_Graph, m_First);
}
bool BasicBlock::RebuildReachable()
{
// Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes.
// Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill())
ReachableVariables newReachable = m_DEDef;
bool hasChanges = false;
// Loop each predecessor
std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr)
{
const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr);
const ReachableVariables &precReachable = prec->Reachable();
// Loop each variable*component
std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair<uint32_t, BasicBlock::ReachableDefinitionsPerVariable> &itr2)
{
uint32_t regIdx = itr2.first;
const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second;
// Already killed in this block?
if (VarKill().find(regIdx) != VarKill().end())
return;
// Only do comparisons against current definitions if we've yet to find any changes
BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0;
if (!hasChanges)
currReachablePerVar = &m_Reachable[regIdx];
BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx];
// Loop each definition
std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d)
{
if (!hasChanges)
{
// Check if already there
if (currReachablePerVar->find(d) == currReachablePerVar->end())
hasChanges = true;
}
newReachablePerVar.insert(d);
}); // definition
}); // variable*component
}); // predecessor
if (hasChanges)
{
std::swap(m_Reachable, newReachable);
}
return hasChanges;
}
void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
{
std::for_each(b.begin(), b.end(), [&a](const std::pair<uint32_t, ReachableDefinitionsPerVariable> &rpvPair)
{
uint32_t regIdx = rpvPair.first;
const ReachableDefinitionsPerVariable &rpv = rpvPair.second;
// No previous definitions for this variable?
auto aRPVItr = a.find(regIdx);
if (aRPVItr == a.end())
{
// Just set the definitions and continue
a[regIdx] = rpv;
return;
}
ReachableDefinitionsPerVariable &aRPV = aRPVItr->second;
aRPV.insert(rpv.begin(), rpv.end());
});
}
#if ENABLE_UNIT_TESTS
#define UNITY_EXTERNAL_TOOL 1
#include "Testing.h" // From Runtime/Testing
UNIT_TEST_SUITE(HLSLccTests)
{
TEST(ControlFlowGraph_Build_Simple_Works)
{
Instruction inst[] =
{
// MOV t0.xyzw, I0.xyzw
Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf),
Instruction(1, OPCODE_RET)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[1], root.Last());
CHECK(root.Preceding().empty());
CHECK(root.Succeeding().empty());
CHECK_EQUAL(4, root.VarKill().size());
// Check that all components from t0 are killed
CHECK_EQUAL(1, root.VarKill().count(0));
CHECK_EQUAL(1, root.VarKill().count(1));
CHECK_EQUAL(1, root.VarKill().count(2));
CHECK_EQUAL(1, root.VarKill().count(3));
CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand);
}
TEST(ControlFlowGraph_Build_If_Works)
{
Instruction inst[] =
{
// B0
// 0: MOV t1.xyzw, i0.xyzw
Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf),
// 1: MUL t0, t1, t1
Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf),
// 2: IF t1.y
Instruction(2, OPCODE_IF, 1, 2),
// B1
// 3: MOV o0, t0
Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf),
// 4:
Instruction(4, OPCODE_ELSE),
// B2
// 5: MOV o0, t1
Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
// 6:
Instruction(6, OPCODE_ENDIF),
// B3
// 7:
Instruction(7, OPCODE_NOP),
// 8:
Instruction(8, OPCODE_RET)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
CHECK_EQUAL(root.First(), &inst[0]);
CHECK_EQUAL(root.Last(), &inst[2]);
CHECK(root.Preceding().empty());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
CHECK_EQUAL(&inst[3], b1->First());
CHECK_EQUAL(&inst[5], b2->First());
CHECK_EQUAL(&inst[7], b3->First());
CHECK_EQUAL(&inst[4], b1->Last());
CHECK_EQUAL(&inst[6], b2->Last());
CHECK_EQUAL(&inst[8], b3->Last());
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[5]));
CHECK_EQUAL(2, root.Succeeding().size());
CHECK_EQUAL(1, b1->Preceding().size());
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b2->Preceding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b3->Preceding().size());
CHECK_EQUAL(0, b3->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b3->Preceding().count(&inst[5]));
// The if block must have upwards-exposed t0
CHECK_EQUAL(1, b1->UEVar().count(0));
CHECK_EQUAL(1, b1->UEVar().count(1));
CHECK_EQUAL(1, b1->UEVar().count(2));
CHECK_EQUAL(1, b1->UEVar().count(3));
// The else block must have upwards-exposed t1
CHECK_EQUAL(1, b2->UEVar().count(4));
CHECK_EQUAL(1, b2->UEVar().count(5));
CHECK_EQUAL(1, b2->UEVar().count(6));
CHECK_EQUAL(1, b2->UEVar().count(7));
CHECK_EQUAL(8, root.VarKill().size());
// Check that all components from t0 and t1 are killed
CHECK_EQUAL(1, root.VarKill().count(0));
CHECK_EQUAL(1, root.VarKill().count(1));
CHECK_EQUAL(1, root.VarKill().count(2));
CHECK_EQUAL(1, root.VarKill().count(3));
CHECK_EQUAL(1, root.VarKill().count(4));
CHECK_EQUAL(1, root.VarKill().count(5));
CHECK_EQUAL(1, root.VarKill().count(6));
CHECK_EQUAL(1, root.VarKill().count(7));
// The expected downwards-exposed definitions:
// B0: t0, t1
// B1-B3: none
CHECK_EQUAL(8, root.DEDef().size());
CHECK_EQUAL(0, b1->DEDef().size());
CHECK_EQUAL(0, b2->DEDef().size());
CHECK_EQUAL(0, b3->DEDef().size());
CHECK(root.DEDef()==root.Reachable());
CHECK(root.Reachable()==b1->Reachable());
CHECK(root.Reachable()==b2->Reachable());
CHECK(root.Reachable()==b3->Reachable());
}
TEST(ControlFlowGraph_Build_SwitchCase_Works)
{
Instruction inst[] =
{
// Start B0
// i0: MOV t0.x, I0.x
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
// i1: MOVE t1.xyz, I0.yzw
Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe),
// i2: MOVE t1.w, t0.x
Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1),
// i3: MOVE t2, I0
Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf),
// i4: SWITCH t0.y
Instruction(4, OPCODE_SWITCH, 1, 2),
// End B0
// i5: CASE
Instruction(5, OPCODE_CASE),
// i6: DEFAULT
Instruction(6, OPCODE_DEFAULT),
// Start B1
// i7: MOC t1.z, t0.x
Instruction(7, OPCODE_MOV, 1, 4, 0, 1),
// i8: CASE
Instruction(8, OPCODE_CASE),
// End B1
// Start B2
// i9: MOV t1.z, t2.x
Instruction(9, OPCODE_MOV, 1, 4, 2, 1),
// i10: BREAK
Instruction(10, OPCODE_BREAK),
// End B2
// i11: CASE
Instruction(11, OPCODE_CASE),
// Start B3
// i12: MOV t1.z, t2.y
Instruction(12, OPCODE_MOV, 1, 4, 2, 2),
// i13: BREAKC t0.x
Instruction(13, OPCODE_BREAKC, 0, 1),
// End B3
// i14: CASE
Instruction(14, OPCODE_CASE),
// Start B4
// i15: MOV t1.z, t2.z
Instruction(15, OPCODE_MOV, 1, 4, 2, 4),
// i16: ENDSWITCH
Instruction(16, OPCODE_ENDSWITCH),
// End B4
// Start B5
// i17: MOV o0, t1
Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
// i18: RET
Instruction(18, OPCODE_RET)
// End B5
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[4], root.Last());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]);
const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]);
const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
CHECK(b4 != NULL);
CHECK(b5 != NULL);
// Check instruction ranges
CHECK_EQUAL(&inst[8], b1->Last());
CHECK_EQUAL(&inst[10], b2->Last());
CHECK_EQUAL(&inst[13], b3->Last());
CHECK_EQUAL(&inst[16], b4->Last());
CHECK_EQUAL(&inst[18], b5->Last());
// Nothing before the root, nothing after b5
CHECK(root.Preceding().empty());
CHECK(b5->Succeeding().empty());
// Check that all connections are there and no others.
// B0->B1
// B0->B2
// B0->B3
// B0->B4
CHECK_EQUAL(1, root.Succeeding().count(&inst[7]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[9]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[12]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[15]));
CHECK_EQUAL(4, root.Succeeding().size());
// B1
// B1->B2
CHECK_EQUAL(1, b1->Succeeding().count(&inst[9]));
CHECK_EQUAL(1, b1->Succeeding().size());
// B0->B1, reverse
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b1->Preceding().size());
// B2
// B2->B5
CHECK_EQUAL(1, b2->Succeeding().count(&inst[17]));
CHECK_EQUAL(1, b2->Succeeding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[7]));
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b2->Preceding().size());
// B3
// B3->B4
// B3->B5
CHECK_EQUAL(1, b3->Succeeding().count(&inst[15]));
CHECK_EQUAL(1, b3->Succeeding().count(&inst[17]));
CHECK_EQUAL(2, b3->Succeeding().size());
CHECK_EQUAL(1, b3->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b3->Preceding().size());
// B4
CHECK_EQUAL(1, b4->Succeeding().count(&inst[17]));
CHECK_EQUAL(1, b4->Succeeding().size());
CHECK_EQUAL(1, b4->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b4->Preceding().size());
// B5
CHECK_EQUAL(0, b5->Succeeding().size());
CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4
CHECK_EQUAL(1, b5->Preceding().count(&inst[9]));
CHECK_EQUAL(1, b5->Preceding().count(&inst[12]));
CHECK_EQUAL(1, b5->Preceding().count(&inst[15]));
// Verify reachable sets
CHECK(root.Reachable() == root.DEDef());
CHECK_EQUAL(9, root.Reachable().size());
// B5 should have these reachables:
// t0.x only from b0
// t1.xy from b0, i1
// t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2)
// t1.w from b0, i2
// t2.xyzw from b0, i3
// Cast away const so [] works.
BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable();
CHECK_EQUAL(9, r.size());
CHECK_EQUAL(1, r[0].size());
CHECK_EQUAL(0, r[1].size());
CHECK_EQUAL(0, r[2].size());
CHECK_EQUAL(0, r[3].size());
CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction);
CHECK_EQUAL(1, r[4].size());
CHECK_EQUAL(1, r[5].size());
CHECK_EQUAL(3, r[6].size());
CHECK_EQUAL(1, r[7].size());
const BasicBlock::ReachableDefinitionsPerVariable &d = r[6];
BasicBlock::ReachableDefinitionsPerVariable t;
t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0]));
t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0]));
t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0]));
CHECK(t == d);
CHECK_EQUAL(1, r[8].size());
CHECK_EQUAL(1, r[9].size());
CHECK_EQUAL(1, r[10].size());
CHECK_EQUAL(1, r[11].size());
}
TEST(ControlFlowGraph_Build_Loop_Works)
{
Instruction inst[] =
{
// Start B0
// i0: MOV t0.x, I0.x
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
// i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should.
Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc),
// i2: LOOP
Instruction(2, OPCODE_LOOP, 1, 2),
// End B0 -> B1
// Begin B1
// i3: MOV t1.x, t0.x
Instruction(3, OPCODE_MOV, 1, 1, 0, 1),
// i4: BREAKC t0.x
Instruction(4, OPCODE_BREAKC, 0, 1),
// End B1 -> B2, B3
// Begin B2
// i5: ADD t0.x, t0.y
Instruction(5, OPCODE_ADD, 0, 1, 0, 2),
// i6: MOV t1.x, t0.x // This should never show up as definition
Instruction(6, OPCODE_MOV, 1, 1, 0, 1),
// i7: ENDLOOP
Instruction(7, OPCODE_ENDLOOP),
// End B2 -> B1
// Start B3
// i8: MOV O0.x, t1.x
Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1),
// i9: RET
Instruction(9, OPCODE_RET),
// End B3
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst);
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[2], root.Last());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
// Check instruction ranges
CHECK_EQUAL(&inst[4], b1->Last());
CHECK_EQUAL(&inst[7], b2->Last());
CHECK_EQUAL(&inst[9], b3->Last());
// Nothing before the root, nothing after b3
CHECK(root.Preceding().empty());
CHECK(b3->Succeeding().empty());
// Check that all connections are there and no others.
// B0->B1
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
CHECK_EQUAL(1, root.Succeeding().size());
// B1
// B1->B2
// B1->B3
CHECK_EQUAL(1, b1->Succeeding().count(&inst[5]));
CHECK_EQUAL(1, b1->Succeeding().count(&inst[8]));
CHECK_EQUAL(2, b1->Succeeding().size());
// B0->B1, reverse
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
// We may also come from B2
CHECK_EQUAL(1, b1->Preceding().count(&inst[5]));
CHECK_EQUAL(2, b1->Preceding().size());
// B2
// B2->B1
CHECK_EQUAL(1, b2->Succeeding().count(&inst[3]));
CHECK_EQUAL(1, b2->Succeeding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b2->Preceding().size());
// B3
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b3->Preceding().size());
// Verify reachable sets
BasicBlock::ReachableVariables t;
// B0 DEDef and Reachable
t.clear();
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
CHECK(root.DEDef() == t);
CHECK(root.Reachable() == root.DEDef());
// B1 DEDef and Reachable
t.clear();
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
CHECK(b1->DEDef() == t);
t = b1->DEDef();
// t0.x from i0, t1.y (but not .x) from i1
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
// t0.x from i5, but nothing from i6
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
CHECK(b1->Reachable() == t);
// B2
t.clear();
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0]));
CHECK(b2->DEDef() == t);
t = b2->DEDef();
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
CHECK(b2->Reachable() == t);
// B3
t.clear();
CHECK(b3->DEDef() == t);
// t0.x from i0, t1.y from i1
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
// t1.x from i3
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
// t0.x from i5
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
CHECK(b3->Reachable() == t);
}
}
#endif

View File

@ -0,0 +1,121 @@
#include "ControlFlowGraphUtils.h"
#include "internal_includes/debug.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH
const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/)
{
const Instruction *inst = psStart;
// Skip CASE/DEFAULT/ENDSWITCH/LOOP labels
while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP)
{
// We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it)
ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL);
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL)
*sawEndSwitch = true;
inst++;
}
return inst;
}
// For a given flow-control instruction, find the corresponding jump location:
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
// For ELSE, find same level ENDIF + 1
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
// For ENDLOOP, find previous same-level LOOP + 1
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
// For CONTINUE/C the previous LOOP + 1
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
// Note that CASE labels fall through.
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/)
{
const Instruction *inst = psStart;
int depth = 0;
OPCODE_TYPE op = psStart->eOpcode;
ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC
|| op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT
|| op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC);
switch (op)
{
default:
ASSERT(0);
break;
case OPCODE_IF:
case OPCODE_ELSE:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_IF)
depth++;
if (inst->eOpcode == OPCODE_ENDIF)
depth--;
}
case OPCODE_BREAK:
case OPCODE_BREAKC:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP)
depth++;
if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP)
depth--;
}
case OPCODE_CONTINUE:
case OPCODE_CONTINUEC:
case OPCODE_ENDLOOP:
while (1)
{
inst--;
if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_LOOP)
depth--;
if (inst->eOpcode == OPCODE_ENDLOOP)
depth++;
}
case OPCODE_SWITCH:
case OPCODE_CASE:
case OPCODE_DEFAULT:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
{
// Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH
// so that BasicBlock::Build can distinguish between there being a direct route
// from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not.
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0)
*sawEndSwitch = true;
return GetNextNonLabelInstruction(inst + 1, needConnectToParent);
}
if (inst->eOpcode == OPCODE_SWITCH)
depth++;
if (inst->eOpcode == OPCODE_ENDSWITCH)
depth--;
}
}
return 0;
}

769
src/DataTypeAnalysis.cpp Normal file
View File

@ -0,0 +1,769 @@
#include "internal_includes/debug.h"
#include "internal_includes/tokens.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/DataTypeAnalysis.h"
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
#include <algorithm>
// Helper function to set the vector type of 1 or more components in a vector
// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done
// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to
// to the "highest" type value (ordering int->uint->float)
static void SetVectorType(std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress)
{
int i = 0;
// Expand the mask to include all components that are used, also upgrade type
for (i = 0; i < 4; i++)
{
if (aeTempVecType[regBaseIndex + i] != SVT_VOID)
{
componentMask |= (1 << i);
eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]);
}
}
// Now componentMask contains the components we actually need to update and eType may have been changed to something else.
// Write the results
for (i = 0; i < 4; i++)
{
if (componentMask & (1 << i))
{
if (aeTempVecType[regBaseIndex + i] != eType)
{
aeTempVecType[regBaseIndex + i] = eType;
if (psMadeProgress)
*psMadeProgress = 1;
}
}
}
}
static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault)
{
SHADER_VARIABLE_TYPE eType = eDefault;
switch (prec)
{
case OPERAND_MIN_PRECISION_DEFAULT:
break;
case OPERAND_MIN_PRECISION_SINT_16:
eType = SVT_INT16;
break;
case OPERAND_MIN_PRECISION_UINT_16:
eType = SVT_UINT16;
break;
case OPERAND_MIN_PRECISION_FLOAT_2_8:
eType = SVT_FLOAT10;
break;
case OPERAND_MIN_PRECISION_FLOAT_16:
eType = SVT_FLOAT16;
break;
default:
ASSERT(0); // Catch this to see what's going on.
break;
}
return eType;
}
static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
{
if (psOperand->eType == OPERAND_TYPE_TEMP)
{
const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4;
uint32_t mask = psOperand->GetAccessMask();
// Adjust type based on operand precision
eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType);
SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL);
}
}
static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
{
uint32_t i = 0;
for (i = 0; i < psInst->ui32NumOperands; i++)
{
MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType);
}
}
// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again.
static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand)
{
const ConstantBuffer* psCBuf = NULL;
const ShaderVarType* psVarType = NULL;
int32_t rebase = 0;
bool isArray;
if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER)
return;
// Ignore selection modes that access more than one component
switch (psOperand->eSelMode)
{
case OPERAND_4_COMPONENT_SELECT_1_MODE:
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
if (!psOperand->IsSwizzleReplicated())
return;
break;
case OPERAND_4_COMPONENT_MASK_MODE:
return;
}
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (psVarType->Class == SVC_SCALAR)
psOperand->iNumComponents = 1;
}
struct SetPartialDataTypes
{
SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec)
: m_TempVec(_aeTempVec)
{}
SHADER_VARIABLE_TYPE *m_TempVec;
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
{
uint32_t mask = 0;
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
SHADER_VARIABLE_TYPE newType;
uint32_t i, reg;
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
if (ui32OperandType == FEO_FLAG_SUBOPERAND)
{
// We really shouldn't ever be getting minprecision float indices here
ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8);
mask = psOperand->GetAccessMask();
reg = psOperand->ui32RegisterNumber;
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS);
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
if (aeTempVecType[reg * 4 + i] == SVT_VOID)
aeTempVecType[reg * 4 + i] = newType;
}
return;
}
if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
return;
mask = psOperand->GetAccessMask();
reg = psOperand->ui32RegisterNumber;
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID);
ASSERT(newType != SVT_VOID);
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
aeTempVecType[reg * 4 + i] = newType;
}
return;
}
};
// Write back the temp datatypes into operands. Also mark scalars in constant buffers
struct WritebackDataTypes
{
WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec)
: m_Context(_ctx)
, m_TempVec(_aeTempVec)
{}
HLSLCrossCompilerContext *m_Context;
SHADER_VARIABLE_TYPE *m_TempVec;
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
{
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
uint32_t reg, mask, i;
SHADER_VARIABLE_TYPE dtype;
if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
SetCBOperandComponents(m_Context, psOperand);
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
reg = psOperand->ui32RegisterNumber;
mask = psOperand->GetAccessMask();
dtype = SVT_VOID;
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
// Check that all components have the same type
ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]);
dtype = aeTempVecType[reg * 4 + i];
ASSERT(dtype != SVT_VOID);
ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype));
psOperand->aeDataType[i] = dtype;
}
return;
}
};
void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> & instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results)
{
uint32_t i;
Instruction *psFirstInst = &instructions[0];
Instruction *psInst = psFirstInst;
// Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float
std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType = results;
aeTempVecType.clear();
aeTempVecType.resize(ui32TempCount * 4, SVT_VOID);
if (ui32TempCount == 0)
return;
// Go through the instructions, pick up partial datatypes, because we at least know those for a fact.
// Also set all suboperands to be integers (they're always used as indices)
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0]));
// if (psContext->psShader->ui32MajorVersion <= 3)
{
// First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table
// Only ever to int->float promotion (or int->uint), never the other way around
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
{
int k = 0;
if (psInst->ui32NumOperands == 0)
continue;
#ifdef _DEBUG
for (k = 0; k < (int)psInst->ui32NumOperands; k++)
{
if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP)
{
ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount);
}
}
#endif
switch (psInst->eOpcode)
{
// All float-only ops
case OPCODE_ADD:
case OPCODE_DERIV_RTX:
case OPCODE_DERIV_RTY:
case OPCODE_DIV:
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_EXP:
case OPCODE_FRC:
case OPCODE_LOG:
case OPCODE_MAD:
case OPCODE_MIN:
case OPCODE_MAX:
case OPCODE_MUL:
case OPCODE_ROUND_NE:
case OPCODE_ROUND_NI:
case OPCODE_ROUND_PI:
case OPCODE_ROUND_Z:
case OPCODE_RSQ:
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_SAMPLE_B:
case OPCODE_SQRT:
case OPCODE_SINCOS:
case OPCODE_LOD:
case OPCODE_GATHER4:
case OPCODE_DERIV_RTX_COARSE:
case OPCODE_DERIV_RTX_FINE:
case OPCODE_DERIV_RTY_COARSE:
case OPCODE_DERIV_RTY_FINE:
case OPCODE_GATHER4_C:
case OPCODE_GATHER4_PO:
case OPCODE_GATHER4_PO_C:
case OPCODE_RCP:
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
break;
// Comparison ops, need to enable possibility for going boolean
case OPCODE_IEQ:
case OPCODE_INE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
break;
case OPCODE_AND:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CALLC:
case OPCODE_CONTINUEC:
case OPCODE_RETC:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
break;
case OPCODE_ILT:
case OPCODE_IGE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
break;
case OPCODE_ULT:
case OPCODE_UGE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
break;
// Integer ops that don't care of signedness
case OPCODE_IADD:
case OPCODE_INEG:
case OPCODE_ISHL:
case OPCODE_NOT:
case OPCODE_OR:
case OPCODE_XOR:
case OPCODE_BUFINFO:
case OPCODE_COUNTBITS:
case OPCODE_FIRSTBIT_HI:
case OPCODE_FIRSTBIT_LO:
case OPCODE_FIRSTBIT_SHI:
case OPCODE_BFI:
case OPCODE_BFREV:
case OPCODE_ATOMIC_AND:
case OPCODE_ATOMIC_OR:
case OPCODE_ATOMIC_XOR:
case OPCODE_ATOMIC_CMP_STORE:
case OPCODE_ATOMIC_IADD:
case OPCODE_IMM_ATOMIC_IADD:
case OPCODE_IMM_ATOMIC_AND:
case OPCODE_IMM_ATOMIC_OR:
case OPCODE_IMM_ATOMIC_XOR:
case OPCODE_IMM_ATOMIC_EXCH:
case OPCODE_IMM_ATOMIC_CMP_EXCH:
MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType);
break;
// Integer ops
case OPCODE_IMAD:
case OPCODE_IMAX:
case OPCODE_IMIN:
case OPCODE_IMUL:
case OPCODE_ISHR:
case OPCODE_IBFE:
case OPCODE_ATOMIC_IMAX:
case OPCODE_ATOMIC_IMIN:
case OPCODE_IMM_ATOMIC_IMAX:
case OPCODE_IMM_ATOMIC_IMIN:
MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType);
break;
// uint ops
case OPCODE_UDIV:
case OPCODE_UMUL:
case OPCODE_UMAD:
case OPCODE_UMAX:
case OPCODE_UMIN:
case OPCODE_USHR:
case OPCODE_UADDC:
case OPCODE_USUBB:
case OPCODE_ATOMIC_UMAX:
case OPCODE_ATOMIC_UMIN:
case OPCODE_IMM_ATOMIC_UMAX:
case OPCODE_IMM_ATOMIC_UMIN:
case OPCODE_IMM_ATOMIC_ALLOC:
case OPCODE_IMM_ATOMIC_CONSUME:
MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType);
break;
case OPCODE_UBFE:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType);
break;
// Need special handling
case OPCODE_FTOI:
case OPCODE_FTOU:
MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_GE:
case OPCODE_LT:
case OPCODE_EQ:
case OPCODE_NE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_ITOF:
case OPCODE_UTOF:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType);
break;
case OPCODE_LD:
case OPCODE_LD_MS:
// TODO: Would need to know the sampler return type
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
break;
case OPCODE_MOVC:
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
case OPCODE_SWAPC:
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
case OPCODE_RESINFO:
{
if (psInst->eResInfoReturnType != RESINFO_INSTRUCTION_RETURN_UINT)
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
break;
}
case OPCODE_SAMPLE_INFO:
// TODO decode the _uint flag
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_SAMPLE_POS:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_LD_UAV_TYPED:
// translates to gvec4 loadImage(gimage i, ivec p).
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
break;
case OPCODE_STORE_UAV_TYPED:
// translates to storeImage(gimage i, ivec p, gvec4 data)
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data
break;
case OPCODE_LD_RAW:
if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
else
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
break;
case OPCODE_STORE_RAW:
if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
else
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
break;
case OPCODE_LD_STRUCTURED:
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
break;
case OPCODE_STORE_STRUCTURED:
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType);
break;
case OPCODE_F32TOF16:
case OPCODE_F16TOF32:
// TODO
ASSERT(0);
break;
// No-operands, should never get here anyway
/* case OPCODE_BREAK:
case OPCODE_CALL:
case OPCODE_CASE:
case OPCODE_CONTINUE:
case OPCODE_CUT:
case OPCODE_DEFAULT:
case OPCODE_DISCARD:
case OPCODE_ELSE:
case OPCODE_EMIT:
case OPCODE_EMITTHENCUT:
case OPCODE_ENDIF:
case OPCODE_ENDLOOP:
case OPCODE_ENDSWITCH:
case OPCODE_LABEL:
case OPCODE_LOOP:
case OPCODE_CUSTOMDATA:
case OPCODE_NOP:
case OPCODE_RET:
case OPCODE_SWITCH:
case OPCODE_DCL_RESOURCE: // DCL* opcodes have
case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
case OPCODE_DCL_SAMPLER:
case OPCODE_DCL_INDEX_RANGE:
case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
case OPCODE_DCL_GS_INPUT_PRIMITIVE:
case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
case OPCODE_DCL_INPUT:
case OPCODE_DCL_INPUT_SGV:
case OPCODE_DCL_INPUT_SIV:
case OPCODE_DCL_INPUT_PS:
case OPCODE_DCL_INPUT_PS_SGV:
case OPCODE_DCL_INPUT_PS_SIV:
case OPCODE_DCL_OUTPUT:
case OPCODE_DCL_OUTPUT_SGV:
case OPCODE_DCL_OUTPUT_SIV:
case OPCODE_DCL_TEMPS:
case OPCODE_DCL_INDEXABLE_TEMP:
case OPCODE_DCL_GLOBAL_FLAGS:
case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader
case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader
case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader
case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader
case OPCODE_EMIT_STREAM:
case OPCODE_CUT_STREAM:
case OPCODE_EMITTHENCUT_STREAM:
case OPCODE_INTERFACE_CALL:
case OPCODE_DCL_STREAM:
case OPCODE_DCL_FUNCTION_BODY:
case OPCODE_DCL_FUNCTION_TABLE:
case OPCODE_DCL_INTERFACE:
case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
case OPCODE_DCL_TESS_DOMAIN:
case OPCODE_DCL_TESS_PARTITIONING:
case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
case OPCODE_DCL_HS_MAX_TESSFACTOR:
case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
case OPCODE_DCL_THREAD_GROUP:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
case OPCODE_DCL_RESOURCE_RAW:
case OPCODE_DCL_RESOURCE_STRUCTURED:
case OPCODE_SYNC:
// TODO
case OPCODE_DADD:
case OPCODE_DMAX:
case OPCODE_DMIN:
case OPCODE_DMUL:
case OPCODE_DEQ:
case OPCODE_DGE:
case OPCODE_DLT:
case OPCODE_DNE:
case OPCODE_DMOV:
case OPCODE_DMOVC:
case OPCODE_DTOF:
case OPCODE_FTOD:
case OPCODE_EVAL_SNAPPED:
case OPCODE_EVAL_SAMPLE_INDEX:
case OPCODE_EVAL_CENTROID:
case OPCODE_DCL_GS_INSTANCE_COUNT:
case OPCODE_ABORT:
case OPCODE_DEBUG_BREAK:*/
default:
break;
}
}
}
{
int madeProgress = 0;
// Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have
do
{
madeProgress = 0;
psInst = psFirstInst;
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
{
if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC)
{
// Figure out the data type
uint32_t k;
SHADER_VARIABLE_TYPE dataType = SVT_VOID;
int foundImmediate = 0;
for (k = 0; k < psInst->ui32NumOperands; k++)
{
uint32_t mask, j;
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
continue; // Ignore the condition operand, it's always int
if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32)
{
foundImmediate = 1;
continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed
}
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
{
dataType = psInst->asOperands[k].GetDataType(psContext);
break;
}
if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE)
{
// If any modifiers are used in MOV or MOVC, that automatically is treated as float.
dataType = SVT_FLOAT;
break;
}
mask = psInst->asOperands[k].GetAccessMask();
for (j = 0; j < 4; j++)
{
if (!(mask & (1 << j)))
continue;
if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID)
{
dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]);
}
}
}
if (foundImmediate && dataType == SVT_VOID)
dataType = SVT_INT;
if (dataType != SVT_VOID)
{
// Found data type, write to all operands
// First adjust it to not have precision qualifiers in it
switch (dataType)
{
case SVT_FLOAT10:
case SVT_FLOAT16:
dataType = SVT_FLOAT;
break;
case SVT_INT12:
case SVT_INT16:
dataType = SVT_INT;
break;
case SVT_UINT16:
case SVT_UINT8:
dataType = SVT_UINT;
break;
default:
break;
}
for (k = 0; k < psInst->ui32NumOperands; k++)
{
uint32_t mask;
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
continue; // Ignore the condition operand, it's always int
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
continue;
if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
continue;
mask = psInst->asOperands[k].GetAccessMask();
SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress);
}
}
}
}
} while (madeProgress != 0);
}
// translate forced_int and int_ambiguous back to int
for (i = 0; i < ui32TempCount * 4; i++)
{
if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS)
aeTempVecType[i] = SVT_INT;
}
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0]));
// Propagate boolean data types over logical operators
bool didProgress = false;
do
{
didProgress = false;
std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i)
{
if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR)
&& (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL)
&& (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL))
{
// Check if all uses see only this define
bool isStandalone = true;
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u)
{
if (u.m_Op->m_Defines.size() > 1)
isStandalone = false;
});
if (isStandalone)
{
didProgress = true;
// Change data type of this and all uses
i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL;
uint32_t reg = i.asOperands[0].ui32RegisterNumber;
aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL;
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u)
{
u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL;
});
}
}
});
} while (didProgress);
}

2
src/Declaration.cpp Normal file
View File

@ -0,0 +1,2 @@
#include "internal_includes/Declaration.h"

View File

@ -0,0 +1,253 @@
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/Shader.h"
#include "internal_includes/DataTypeAnalysis.h"
#include "internal_includes/UseDefineChains.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/debug.h"
#include "internal_includes/Translator.h"
#include "internal_includes/ControlFlowGraph.h"
#include <sstream>
void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
{
size_t ui32DeclCount = psPhase->psDecl.size();
uint32_t i;
psPhase->psTempDeclaration = NULL;
psPhase->ui32OrigTemps = 0;
psPhase->ui32TotalTemps = 0;
// Retrieve the temp decl count
for (i = 0; i < ui32DeclCount; ++i)
{
if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
{
psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps;
psPhase->psTempDeclaration = &psPhase->psDecl[i];
break;
}
}
if (psPhase->ui32TotalTemps == 0)
return;
psPhase->ui32OrigTemps = psPhase->ui32TotalTemps;
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
psPhase->pui32SplitInfo.clear();
psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff);
// Build use-define chains and split temps based on those.
{
DefineUseChains duChains;
UseDefineChains udChains;
BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG());
CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
// Only do sampler precision downgrade on pixel shaders.
if (psShader->eShaderType == PIXEL_SHADER)
UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
WriteBackUsesAndDefines(duChains);
}
HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes);
if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps))
psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
}
void HLSLCrossCompilerContext::ClearDependencyData()
{
switch (psShader->eShaderType)
{
case PIXEL_SHADER:
{
psDependencies->ClearCrossDependencyData();
}
case HULL_SHADER:
{
psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
break;
}
default:
break;
}
}
void HLSLCrossCompilerContext::AddIndentation()
{
int i;
bstring glsl = *currentGLSLString;
for (i = 0; i < indent; ++i)
{
bcatcstr(glsl, " ");
}
}
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
{
std::ostringstream oss;
const ShaderInfo::InOutSignature* psIn = NULL;
int regSpace = psOperand->GetRegisterSpace(this);
if (iIgnoreRedirect == 0)
{
if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
||
(regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
{
oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber;
if (piRebase)
*piRebase = 0;
return oss.str();
}
}
if (regSpace == 0)
psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
if (psIn && piRebase)
*piRebase = psIn->iRebase;
std::string res = "";
bool skipPrefix = false;
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix))
{
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
return inputPrefix + res;
else
return res;
}
ASSERT(psIn != NULL);
oss << inputPrefix << (regSpace == 1 ? "patch" : "") << psIn->semanticName << psIn->ui32SemanticIndex;
return oss.str();
}
std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand,
int* piStream,
uint32_t *puiIgnoreSwizzle,
int *piRebase,
int iIgnoreRedirect) const
{
std::ostringstream oss;
const ShaderInfo::InOutSignature* psOut = NULL;
int regSpace = psOperand->GetRegisterSpace(this);
if (iIgnoreRedirect == 0)
{
if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|| (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
{
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber;
if (piRebase)
*piRebase = 0;
return oss.str();
}
}
if (regSpace == 0)
psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true);
if (psOut && piRebase)
*piRebase = psOut->iRebase;
if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end()))
{
// Need to route through temp output variable
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second;
if (!psOperand->m_SubOperands[0].get())
{
oss << "[" << psOperand->ui32RegisterNumber << "]";
}
if (piRebase)
*piRebase = 0;
return oss.str();
}
std::string res = "";
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false))
{
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
return outputPrefix + res;
else
return res;
}
ASSERT(psOut != NULL);
oss << outputPrefix << (regSpace == 1 ? "patch" : "") << psOut->semanticName << psOut->ui32SemanticIndex;
return oss.str();
}
bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count)
{
char compMask = (char)psOperand->ui32CompMask;
int regSpace = psOperand->GetRegisterSpace(this);
uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams
ASSERT(psShader->ui32CurrentVertexOutputStream < 4);
// First check for various builtins, mostly depth-output ones.
if (psShader->eShaderType == PIXEL_SHADER)
{
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL ||
psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL)
{
return true;
}
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH)
{
// GL doesn't need declaration, Metal does.
return psShader->eTargetLanguage == LANG_METAL;
}
}
// Needs declaring if any of the components hasn't been already declared
if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0)
{
int offset;
const ShaderInfo::InOutSignature* psSignature = NULL;
if (psOperand->eSpecialName == NAME_UNDEFINED)
{
// Need to fetch the actual comp mask
if (regSpace == 0)
psShader->sInfo.GetOutputSignatureFromRegister(
psOperand->ui32RegisterNumber,
psOperand->ui32CompMask,
psShader->ui32CurrentVertexOutputStream,
&psSignature);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(
psOperand->ui32RegisterNumber,
psOperand->ui32CompMask,
&psSignature);
compMask = (char)psSignature->ui32Mask;
}
for (offset = 0; offset < count; offset++)
{
psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask;
}
return true;
}
return false;
}

212
src/HLSLcc.cpp Normal file
View File

@ -0,0 +1,212 @@
#include "hlslcc.h"
#include <memory>
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/toGLSL.h"
#include "internal_includes/toMetal.h"
#include "internal_includes/Shader.h"
#include "internal_includes/decode.h"
#ifndef GL_VERTEX_SHADER_ARB
#define GL_VERTEX_SHADER_ARB 0x8B31
#endif
#ifndef GL_FRAGMENT_SHADER_ARB
#define GL_FRAGMENT_SHADER_ARB 0x8B30
#endif
#ifndef GL_GEOMETRY_SHADER
#define GL_GEOMETRY_SHADER 0x8DD9
#endif
#ifndef GL_TESS_EVALUATION_SHADER
#define GL_TESS_EVALUATION_SHADER 0x8E87
#endif
#ifndef GL_TESS_CONTROL_SHADER
#define GL_TESS_CONTROL_SHADER 0x8E88
#endif
#ifndef GL_COMPUTE_SHADER
#define GL_COMPUTE_SHADER 0x91B9
#endif
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result)
{
uint32_t* tokens;
char* glslcstr = NULL;
int GLSLShaderType = GL_FRAGMENT_SHADER_ARB;
int success = 0;
uint32_t i;
tokens = (uint32_t*)shader;
std::auto_ptr<Shader> psShader(DecodeDXBC(tokens, flags));
if (psShader.get())
{
HLSLCrossCompilerContext sContext(reflectionCallbacks);
// Add shader precisions from the list
psShader->sInfo.AddSamplerPrecisions(samplerPrecisions);
if (psShader->ui32MajorVersion <= 3)
{
flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
}
sContext.psShader = psShader.get();
sContext.flags = flags;
sContext.psDependencies = dependencies;
for (i = 0; i < psShader->asPhases.size(); ++i)
{
psShader->asPhases[i].hasPostShaderCode = 0;
}
if (language == LANG_METAL)
{
// Tessellation or geometry shaders are not supported
if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER || psShader->eShaderType == GEOMETRY_SHADER)
{
result->sourceCode = "";
return 0;
}
ToMetal translator(&sContext);
if(!translator.Translate())
{
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
return 0;
}
}
else
{
ToGLSL translator(&sContext);
language = translator.SetLanguage(language);
translator.SetExtensions(extensions);
if (!translator.Translate())
{
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
return 0;
}
}
switch (psShader->eShaderType)
{
case VERTEX_SHADER:
{
GLSLShaderType = GL_VERTEX_SHADER_ARB;
break;
}
case GEOMETRY_SHADER:
{
GLSLShaderType = GL_GEOMETRY_SHADER;
break;
}
case DOMAIN_SHADER:
{
GLSLShaderType = GL_TESS_EVALUATION_SHADER;
break;
}
case HULL_SHADER:
{
GLSLShaderType = GL_TESS_CONTROL_SHADER;
break;
}
case COMPUTE_SHADER:
{
GLSLShaderType = GL_COMPUTE_SHADER;
break;
}
default:
{
break;
}
}
glslcstr = bstr2cstr(sContext.glsl, '\0');
result->sourceCode = glslcstr;
bcstrfree(glslcstr);
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
result->reflection = psShader->sInfo;
result->textureSamplers = psShader->textureSamplers;
success = 1;
}
shader = 0;
tokens = 0;
/* Fill in the result struct */
result->shaderType = GLSLShaderType;
result->GLSLLanguage = language;
return success;
}
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result)
{
FILE* shaderFile;
int length;
size_t readLength;
std::vector<char> shader;
int success = 0;
shaderFile = fopen(filename, "rb");
if (!shaderFile)
{
return 0;
}
fseek(shaderFile, 0, SEEK_END);
length = ftell(shaderFile);
fseek(shaderFile, 0, SEEK_SET);
shader.reserve(length + 1);
readLength = fread(&shader[0], 1, length, shaderFile);
fclose(shaderFile);
shaderFile = 0;
shader[readLength] = '\0';
success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result);
return success;
}

482
src/HLSLccToolkit.cpp Normal file
View File

@ -0,0 +1,482 @@
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/debug.h"
#include "internal_includes/toGLSLOperand.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Shader.h"
#include <sstream>
#include <cmath>
namespace HLSLcc
{
uint32_t GetNumberBitsSet(uint32_t a)
{
// Calculate number of bits in a
// Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
// Works only up to 14 bits (we're only using up to 4)
return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
}
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType)
{
if (eType == SVT_FLOAT16)
{
return TO_FLAG_FORCE_HALF;
}
if (eType == SVT_UINT || eType == SVT_UINT16)
{
return TO_FLAG_UNSIGNED_INTEGER;
}
else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12)
{
return TO_FLAG_INTEGER;
}
else if (eType == SVT_BOOL)
{
return TO_FLAG_BOOL;
}
else
{
return TO_FLAG_NONE;
}
}
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags)
{
if (typeflags & TO_FLAG_FORCE_HALF)
return SVT_FLOAT16;
if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT))
return SVT_INT;
if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT))
return SVT_UINT;
if (typeflags & TO_FLAG_BOOL)
return SVT_BOOL;
return SVT_FLOAT;
}
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
const int components, bool useGLSLPrecision)
{
static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" };
static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" };
static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" };
static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" };
static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" };
static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" };
static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
ASSERT(components >= 1 && components <= 4);
switch (eType)
{
case SVT_UINT:
return uintTypes[components];
case SVT_UINT16:
return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
case SVT_INT:
return intTypes[components];
case SVT_INT16:
return useGLSLPrecision ? int16Types[components] : intTypes[components];
case SVT_INT12:
return useGLSLPrecision ? int12Types[components] : intTypes[components];
case SVT_FLOAT:
return floatTypes[components];
case SVT_FLOAT16:
return useGLSLPrecision ? float16Types[components] : floatTypes[components];
case SVT_FLOAT10:
return useGLSLPrecision ? float10Types[components] : floatTypes[components];
case SVT_BOOL:
return boolTypes[components];
default:
ASSERT(0);
return " ";
}
}
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
const int components)
{
static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" };
static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" };
static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" };
static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" };
static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" };
ASSERT(components >= 1 && components <= 4);
switch (eType)
{
case SVT_UINT:
return uintTypes[components];
case SVT_UINT16:
return ushortTypes[components];
case SVT_INT:
return intTypes[components];
case SVT_INT16:
case SVT_INT12:
return shortTypes[components];
case SVT_FLOAT:
return floatTypes[components];
case SVT_FLOAT16:
case SVT_FLOAT10:
return halfTypes[components];
case SVT_BOOL:
return boolTypes[components];
default:
ASSERT(0);
return " ";
}
}
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/)
{
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return GetConstructorForTypeMetal(eType, components);
else
return GetConstructorForTypeGLSL(eType, components, useGLSLPrecision);
}
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
{
std::string result;
std::ostringstream oss;
if (psContext->psShader->eTargetLanguage == LANG_METAL)
{
switch (eBaseType)
{
case SVT_FLOAT:
oss << "float" << columns << "x" << rows;
break;
case SVT_FLOAT16:
case SVT_FLOAT10:
oss << "half" << columns << "x" << rows;
break;
default:
ASSERT(0);
break;
}
}
else
{
switch (eBaseType)
{
case SVT_FLOAT:
oss << "mat" << columns << "x" << rows;
break;
case SVT_FLOAT16:
oss << "mediump mat" << columns << "x" << rows;
break;
case SVT_FLOAT10:
oss << "lowp mat" << columns << "x" << rows;
break;
default:
ASSERT(0);
break;
}
}
result = oss.str();
return result;
}
void AddSwizzleUsingElementCount(bstring dest, uint32_t count)
{
if (count == 4)
return;
if (count)
{
bcatcstr(dest, ".");
bcatcstr(dest, "x");
count--;
}
if (count)
{
bcatcstr(dest, "y");
count--;
}
if (count)
{
bcatcstr(dest, "z");
count--;
}
if (count)
{
bcatcstr(dest, "w");
count--;
}
}
// Calculate the bits set in mask
int WriteMaskToComponentCount(uint32_t writeMask)
{
// In HLSL bytecode writemask 0 also means everything
if (writeMask == 0)
return 4;
return (int)GetNumberBitsSet(writeMask);
}
uint32_t BuildComponentMaskFromElementCount(int count)
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
return (1 << count) - 1;
}
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src)
{
if (src == dest)
return true;
if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) &&
(src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16))
return true;
if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) &&
(src == SVT_INT || src == SVT_INT12 || src == SVT_INT16))
return true;
if ((dest == SVT_UINT || dest == SVT_UINT16) &&
(src == SVT_UINT || src == SVT_UINT16))
return true;
return false;
}
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType)
{
if (eType == RETURN_TYPE_SINT)
{
return TO_FLAG_INTEGER;
}
else if (eType == RETURN_TYPE_UINT)
{
return TO_FLAG_UNSIGNED_INTEGER;
}
else
{
return TO_FLAG_NONE;
}
}
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec)
{
if (eType == RETURN_TYPE_SINT)
{
switch (ePrec)
{
default:
return SVT_INT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_INT12;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_INT16;
}
}
else if (eType == RETURN_TYPE_UINT)
{
switch (ePrec)
{
default:
return SVT_UINT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_UINT8;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_UINT16;
}
}
else
{
switch (ePrec)
{
default:
return SVT_FLOAT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_FLOAT10;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_FLOAT16;
}
}
}
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
{
return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
}
// Returns true if the operation is commutative
bool IsOperationCommutative(int eOpCode)
{
switch ((OPCODE_TYPE)eOpCode)
{
case OPCODE_DADD:
case OPCODE_IADD:
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_IMUL:
case OPCODE_OR:
case OPCODE_AND:
return true;
default:
return false;
};
}
// Returns true if operands are identical, only cares about temp registers currently.
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB)
{
if (!psA || !psB)
return 0;
if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP)
return 0;
if (psA->eModifier != psB->eModifier)
return 0;
if (psA->iNumComponents != psB->iNumComponents)
return 0;
if (psA->ui32RegisterNumber != psB->ui32RegisterNumber)
return 0;
if (psA->eSelMode != psB->eSelMode)
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask)
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0])
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0]))
return 0;
return 1;
}
bool IsAddOneInstruction(const Instruction *psInst)
{
if (psInst->eOpcode != OPCODE_IADD)
return false;
if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP)
{
if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
return false;
if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32)
return false;
if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1)
return false;
}
else
{
if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32)
return false;
if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP)
return false;
if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
return false;
if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1)
return false;
}
return true;
}
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim)
{
switch ((RESOURCE_DIMENSION)eResDim)
{
case RESOURCE_DIMENSION_TEXTURE1D:
return 1;
case RESOURCE_DIMENSION_TEXTURE2D:
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
case RESOURCE_DIMENSION_TEXTURECUBE:
return 2;
case RESOURCE_DIMENSION_TEXTURE3D:
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
return 3;
default:
ASSERT(0);
break;
}
return 0;
}
// Returns the "more important" type of a and b, currently int < uint < float
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b)
{
#define DO_CHECK(type) if( a == type || b == type ) return type
// Priority ordering
DO_CHECK(SVT_FLOAT16);
DO_CHECK(SVT_FLOAT10);
DO_CHECK(SVT_UINT16);
DO_CHECK(SVT_UINT8);
DO_CHECK(SVT_INT16);
DO_CHECK(SVT_INT12);
DO_CHECK(SVT_FORCED_INT);
DO_CHECK(SVT_FLOAT);
DO_CHECK(SVT_UINT);
DO_CHECK(SVT_INT);
DO_CHECK(SVT_INT_AMBIGUOUS);
#undef DO_CHECK
// After these just rely on ordering.
return a > b ? a : b;
}
// Returns true if a direct constructor can convert src->dest
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
{
// uint<->int<->bool conversions possible
if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
(dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16))
return true;
// float<->double possible
if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) &&
(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
return true;
return false;
}
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f)
{
bstring temp;
int ePos;
int pointPos;
temp = bformat("%.9g", f);
ePos = bstrchrp(temp, 'e', 0);
pointPos = bstrchrp(temp, '.', 0);
bconcat(b, temp);
bdestroy(temp);
if (ePos < 0 && pointPos < 0 && !fpcheck(f))
bcatcstr(b, ".0");
}
};

10
src/HLSLccTypes.natvis Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<Type Name="Instruction">
<DisplayString>{{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}}</DisplayString>
</Type>
<Type Name="Operand">
<DisplayString>{{ type={eType}, reg={ui32RegisterNumber} }}</DisplayString>
</Type>
</AutoVisualizer>

351
src/Instruction.cpp Normal file
View File

@ -0,0 +1,351 @@
#include "internal_includes/Instruction.h"
#include "internal_includes/debug.h"
#include "include/ShaderInfo.h"
// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles
static Operand *GetSrcSwizzleOperand(Instruction *psInst)
{
switch (psInst->eOpcode)
{
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_NOP:
case OPCODE_SWAPC:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
ASSERT(0);
return NULL;
// Normal arithmetics, all srcs have swizzles
case OPCODE_ADD:
case OPCODE_AND:
case OPCODE_DERIV_RTX:
case OPCODE_DERIV_RTX_COARSE:
case OPCODE_DERIV_RTX_FINE:
case OPCODE_DERIV_RTY:
case OPCODE_DERIV_RTY_COARSE:
case OPCODE_DERIV_RTY_FINE:
case OPCODE_DIV:
case OPCODE_EQ:
case OPCODE_EXP:
case OPCODE_FRC:
case OPCODE_FTOI:
case OPCODE_FTOU:
case OPCODE_GE:
case OPCODE_IADD:
case OPCODE_IEQ:
case OPCODE_IGE:
case OPCODE_ILT:
case OPCODE_IMAD:
case OPCODE_IMAX:
case OPCODE_IMIN:
case OPCODE_IMUL:
case OPCODE_INE:
case OPCODE_INEG:
case OPCODE_ITOF:
case OPCODE_LOG:
case OPCODE_LT:
case OPCODE_MAD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MOV:
case OPCODE_MUL:
case OPCODE_NE:
case OPCODE_NOT:
case OPCODE_OR:
case OPCODE_ROUND_NE:
case OPCODE_ROUND_NI:
case OPCODE_ROUND_PI:
case OPCODE_ROUND_Z:
case OPCODE_RSQ:
case OPCODE_SINCOS:
case OPCODE_SQRT:
case OPCODE_UDIV:
case OPCODE_UGE:
case OPCODE_ULT:
case OPCODE_UMAD:
case OPCODE_UMAX:
case OPCODE_UMIN:
case OPCODE_UMUL:
case OPCODE_UTOF:
case OPCODE_XOR:
case OPCODE_BFI:
case OPCODE_BFREV:
case OPCODE_COUNTBITS:
case OPCODE_DADD:
case OPCODE_DDIV:
case OPCODE_DEQ:
case OPCODE_DFMA:
case OPCODE_DGE:
case OPCODE_DLT:
case OPCODE_DMAX:
case OPCODE_DMIN:
case OPCODE_DMUL:
case OPCODE_DMOV:
case OPCODE_DNE:
case OPCODE_DRCP:
case OPCODE_DTOF:
case OPCODE_F16TOF32:
case OPCODE_F32TOF16:
case OPCODE_FIRSTBIT_HI:
case OPCODE_FIRSTBIT_LO:
case OPCODE_FIRSTBIT_SHI:
case OPCODE_FTOD:
case OPCODE_IBFE:
case OPCODE_RCP:
case OPCODE_UADDC:
case OPCODE_UBFE:
case OPCODE_USUBB:
case OPCODE_MOVC:
case OPCODE_DMOVC:
return NULL;
// Special cases:
case OPCODE_GATHER4:
case OPCODE_GATHER4_C:
case OPCODE_LD:
case OPCODE_LD_MS:
case OPCODE_LOD:
case OPCODE_LD_UAV_TYPED:
case OPCODE_LD_RAW:
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_B:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_RESINFO:
return &psInst->asOperands[2];
case OPCODE_GATHER4_PO:
case OPCODE_GATHER4_PO_C:
case OPCODE_LD_STRUCTURED:
return &psInst->asOperands[3];
case OPCODE_ISHL:
case OPCODE_ISHR:
case OPCODE_USHR:
return &psInst->asOperands[1];
default:
ASSERT(0);
return NULL;
}
}
// Tweak the source operands of an instruction so that the rebased write mask will still work
static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase)
{
uint32_t i;
switch (psOperand->eSelMode)
{
default:
case OPERAND_4_COMPONENT_MASK_MODE:
ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL);
// Special case for immediates, they do not have swizzles
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
{
if (psOperand->iNumComponents > 1)
std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]);
return;
}
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
{
if (psOperand->iNumComponents > 1)
std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]);
return;
}
// Need to change this to swizzle
psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE;
psOperand->ui32Swizzle = 0;
for (i = 0; i < 4 - rebase; i++)
psOperand->aui32Swizzle[i] = i + rebase;
for (; i < 4; i++)
psOperand->aui32Swizzle[i] = rebase; // The first actual input.
break;
case OPERAND_4_COMPONENT_SELECT_1_MODE:
// Nothing to do
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
for (i = rebase; i < 4; i++)
psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i];
break;
}
}
void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase)
{
uint32_t i = 0;
uint32_t accessMask = 0;
int isDestination = 0;
Operand *psSwizzleOperand = NULL;
if (flags & UD_CHANGE_SUBOPERANDS)
{
for (i = 0; i < MAX_SUB_OPERANDS; i++)
{
if (psOperand->m_SubOperands[i].get())
ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase);
}
}
if ((flags & UD_CHANGE_MAIN_OPERAND) == 0)
return;
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
if (psOperand->ui32RegisterNumber != oldReg)
return;
accessMask = psOperand->GetAccessMask();
// If this operation touches other components than the one(s) we're splitting, skip it
if ((accessMask & (~compMask)) != 0)
{
// Verify that we've not messed up in reachability analysis.
// This would mean that we've encountered an instruction that accesses
// a component in multi-component mode and we're supposed to treat it as single-use only.
// Now that we track operands we can bring this back
ASSERT((accessMask & compMask) == 0);
return;
}
#if 0
printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask);
#endif
psOperand->ui32RegisterNumber = newReg;
if (rebase == 0)
return;
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
switch (psOperand->eSelMode)
{
case OPERAND_4_COMPONENT_MASK_MODE:
{
uint32_t oldMask = psOperand->ui32CompMask;
if (oldMask == 0)
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
// Check that we're not losing any information
ASSERT((oldMask >> rebase) << rebase == oldMask);
psOperand->ui32CompMask = (oldMask >> rebase);
break;
}
case OPERAND_4_COMPONENT_SELECT_1_MODE:
ASSERT(psOperand->aui32Swizzle[0] >= rebase);
psOperand->aui32Swizzle[0] -= rebase;
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
{
for (i = 0; i < 4; i++)
{
// Note that this rebase is different from the one done for source operands
ASSERT(psOperand->aui32Swizzle[i] >= rebase);
psOperand->aui32Swizzle[i] -= rebase;
}
break;
}
default:
ASSERT(0);
}
// Tweak operand datatypes
std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]);
// If this operand is a destination, we'll need to tweak sources as well
for (i = 0; i < ui32FirstSrc; i++)
{
if (psOperand == &asOperands[i])
{
isDestination = 1;
break;
}
}
if (isDestination == 0)
return;
// Nasty corner case of 2 destinations, not supported if both targets are written
ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL));
// If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction
switch (eOpcode)
{
// The opcodes that do not need tweaking:
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_BUFINFO:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
return;
default:
psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands
if (psSwizzleOperand)
{
DoSrcOperandRebase(psSwizzleOperand, rebase);
return;
}
else
{
for (i = ui32FirstSrc; i < ui32NumOperands; i++)
{
DoSrcOperandRebase(&asOperands[i], rebase);
}
}
return;
}
}
// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision
bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const
{
const Operand *op;
const ResourceBinding *psBinding = NULL;
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
switch (eOpcode)
{
default:
return false;
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_B:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
break;
}
op = &asOperands[3];
ASSERT(op->eType == OPERAND_TYPE_SAMPLER);
info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding);
if (!psBinding)
{
/* Try to look from texture group */
info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding);
}
sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN);
if (sType == OPERAND_MIN_PRECISION_DEFAULT)
return false;
if (pType)
*pType = sType;
return true;
}

363
src/LoopTransform.cpp Normal file
View File

@ -0,0 +1,363 @@
#include "src/internal_includes/LoopTransform.h"
#include "src/internal_includes/Shader.h"
#include "src/internal_includes/debug.h"
#include <algorithm>
#include <vector>
#include <list>
namespace HLSLcc
{
struct LoopInfo
{
public:
LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {}
Instruction * m_StartLoop; // OPCODE_LOOP
Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above.
std::vector<Instruction *> m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth
bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing.
};
typedef std::list<LoopInfo> Loops;
// Build a loopinfo array of all the loops in this shader phase
void BuildLoopInfo(ShaderPhase &phase, Loops &res)
{
using namespace std;
res.clear();
Instruction *i = &phase.psInst[0];
// A stack of loopinfo elements (stored in res)
list<LoopInfo *> loopStack;
// Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here.
list<LoopInfo> dummyLIForSwitches;
while (i != &*phase.psInst.end())
{
if (i->eOpcode == OPCODE_LOOP)
{
LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo());
currLoopInfo->m_StartLoop = i;
loopStack.push_front(currLoopInfo);
}
else if(i->eOpcode == OPCODE_ENDLOOP)
{
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
loopStack.pop_front();
li->m_EndLoop = i;
}
else if (i->eOpcode == OPCODE_SWITCH)
{
// Create a dummy entry into the stack
LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo());
li->m_IsSwitch = true;
loopStack.push_front(li);
}
else if (i->eOpcode == OPCODE_ENDSWITCH)
{
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
loopStack.pop_front();
ASSERT(li->m_IsSwitch);
}
else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC)
{
// Get the current loopstack head
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
// Ignore breaks from switch-cases
if(!li->m_IsSwitch)
{
li->m_ExitPoints.push_back(i);
}
}
i++;
}
}
// Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp
static bool IsScalarTempComparisonInstruction(const Instruction *i)
{
switch (i->eOpcode)
{
default:
return false;
case OPCODE_IGE:
case OPCODE_ILT:
case OPCODE_IEQ:
case OPCODE_INE:
case OPCODE_UGE:
case OPCODE_ULT:
break;
}
if (i->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
int tempOp = -1;
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP)
tempOp = 1;
else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP)
tempOp = 2;
// Also reject comparisons where we compare temp.x vs temp.y
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber)
return false;
if (tempOp == -1)
return false;
if (i->asOperands[0].GetNumSwizzleElements() != 1)
return false;
return true;
}
// Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX <op> imm32
static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b)
{
if (a->eOpcode != b->eOpcode)
return false;
ASSERT(a->ui32NumOperands == b->ui32NumOperands);
uint32_t dstReg = 0;
if (a->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
dstReg = a->asOperands[0].ui32RegisterNumber;
for (uint32_t i = 0; i < a->ui32NumOperands; i++)
{
const Operand &aop = a->asOperands[i];
const Operand &bop = b->asOperands[i];
if (aop.eType != bop.eType)
return false;
if (aop.GetAccessMask() != bop.GetAccessMask())
return false;
if (aop.GetNumSwizzleElements() != 1)
return false;
if (aop.eType == OPERAND_TYPE_TEMP)
{
if (aop.ui32RegisterNumber != bop.ui32RegisterNumber)
return false;
if (aop.ui32RegisterNumber != dstReg)
return false;
}
else if (aop.eType == OPERAND_TYPE_IMMEDIATE32)
{
if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0)
return false;
}
}
return true;
}
// Attempt to transform a single loop into a for-statement
static void AttemptLoopTransform(ShaderPhase &phase, LoopInfo &li)
{
// In order to transform a loop into a for, the following has to hold:
// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
// - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above
// Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement.
// Also, the loop induction variable must be standalone (as in, never used as part of a larger vector)
Instruction *cmpInst = li.m_StartLoop + 1;
if (!IsScalarTempComparisonInstruction(cmpInst))
return;
Instruction *breakInst = li.m_StartLoop + 2;
if (breakInst->eOpcode != OPCODE_BREAKC)
return;
if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return;
if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber)
return;
// Check that the comparison result isn't used anywhere else
if (cmpInst->m_Uses.size() != 1)
return;
ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst);
// Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable
uint32_t inductionVarIdx = 0;
Instruction *lastInst = li.m_EndLoop - 1;
if (lastInst->eOpcode != OPCODE_IADD)
return;
if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return;
if (lastInst->asOperands[0].GetNumSwizzleElements() != 1)
return;
uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber;
// Verify that the induction variable actually matches.
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar)
inductionVarIdx = 1;
else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar)
inductionVarIdx = 2;
else
return;
// Verify that we also read from the induction variable in the last instruction
if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) ||
(lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar)))
return;
// Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops,
// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
{
switch (itr->eOpcode)
{
case OPCODE_LD_RAW:
case OPCODE_LD_STRUCTURED:
case OPCODE_LD_UAV_TYPED:
case OPCODE_STORE_RAW:
case OPCODE_STORE_STRUCTURED:
case OPCODE_STORE_UAV_TYPED:
return; // Nope, can't do a for, not even a partial one.
default:
break;
}
}
// One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst.
// Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called.
// Of course, if all those instructions are identical, then it's fine.
// Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well.
Instruction *initializer = NULL;
std::vector<const Operand::Define *> definitionsOutsideRange;
std::vector<const Operand::Define *> definitionsInsideRange;
std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def)
{
if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop)
definitionsOutsideRange.push_back(&def);
else
definitionsInsideRange.push_back(&def);
});
if (definitionsInsideRange.size() != 1)
{
// All definitions must be identical
for (std::vector<const Operand::Define*>::iterator itr = definitionsInsideRange.begin()+1; itr != definitionsInsideRange.end(); itr++)
{
if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst))
return;
}
}
ASSERT(definitionsOutsideRange.size() > 0);
if (definitionsOutsideRange.size() == 1)
initializer = definitionsOutsideRange[0]->m_Inst;
// Initializer must only write to one component
if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
initializer = 0;
// Check that the initializer is only used within the range so we can move it to for statement
if (initializer)
{
bool hasUsesOutsideRange = false;
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u)
{
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
hasUsesOutsideRange = true;
});
// Has outside uses? we cannot pull that up to the for statement
if (hasUsesOutsideRange)
initializer = 0;
}
// Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either
if (initializer)
{
bool cannotDoInitializer = false;
for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++)
{
const Instruction::Use &u = *itr;
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
{
cannotDoInitializer = true;
break;
}
// Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var)
if (u.m_Op->GetAccessMask() != 1)
{
cannotDoInitializer = true;
break;
}
}
// Has outside uses? we cannot pull that up to the for statement
if (cannotDoInitializer)
initializer = 0;
}
if (initializer)
{
// We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that.
uint32_t newRegister = phase.m_NextFreeTempRegister++;
li.m_StartLoop->m_InductorRegister = newRegister;
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u)
{
u.m_Op->m_ForLoopInductorName = newRegister;
});
// Also tweak the destinations for cmpInst, and lastInst
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
cmpInst->asOperands[1].m_ForLoopInductorName = newRegister;
else
cmpInst->asOperands[2].m_ForLoopInductorName = newRegister;
if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
lastInst->asOperands[1].m_ForLoopInductorName = newRegister;
else
lastInst->asOperands[2].m_ForLoopInductorName = newRegister;
lastInst->asOperands[0].m_ForLoopInductorName = newRegister;
initializer->asOperands[0].m_ForLoopInductorName = newRegister;
}
// This loop can be transformed to for-loop. Do the necessary magicks.
li.m_StartLoop->m_LoopInductors[0] = initializer;
li.m_StartLoop->m_LoopInductors[1] = cmpInst;
li.m_StartLoop->m_LoopInductors[2] = breakInst;
li.m_StartLoop->m_LoopInductors[3] = lastInst;
if (initializer)
initializer->m_SkipTranslation = true;
cmpInst->m_SkipTranslation = true;
breakInst->m_SkipTranslation = true;
lastInst->m_SkipTranslation = true;
}
void DoLoopTransform(ShaderPhase &phase)
{
Loops loops;
BuildLoopInfo(phase, loops);
std::for_each(loops.begin(), loops.end(), [&phase](LoopInfo &li)
{
// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
// Also that there's at least 2 instructions in loop body
ASSERT(li.m_StartLoop != 0);
ASSERT(li.m_EndLoop != 0);
ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
ASSERT(!li.m_IsSwitch);
ASSERT(!li.m_ExitPoints.empty());
AttemptLoopTransform(phase, li);
});
}
};

586
src/Operand.cpp Normal file
View File

@ -0,0 +1,586 @@
#include "internal_includes/Operand.h"
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
uint32_t Operand::GetAccessMask() const
{
int i;
uint32_t accessMask = 0;
// TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now.
switch (eSelMode)
{
default:
case OPERAND_4_COMPONENT_MASK_MODE:
// Update access mask
accessMask = ui32CompMask;
if (accessMask == 0)
accessMask = OPERAND_4_COMPONENT_MASK_ALL;
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
accessMask = 0;
for (i = 0; i < 4; i++)
accessMask |= 1 << (aui32Swizzle[i]);
break;
case OPERAND_4_COMPONENT_SELECT_1_MODE:
accessMask = 1 << (aui32Swizzle[0]);
break;
}
ASSERT(accessMask != 0);
return accessMask;
}
int Operand::GetMaxComponent() const
{
if (iWriteMaskEnabled &&
iNumComponents == 4)
{
//Component Mask
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W))
{
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W)
{
return 4;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)
{
return 3;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)
{
return 2;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X)
{
return 1;
}
}
}
else
//Component Swizzle
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == NO_SWIZZLE)
return 4;
uint32_t res = 0;
for (int i = 0; i < 4; i++)
{
res = std::max(aui32Swizzle[i], res);
}
return (int)res + 1;
}
else
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
return 1;
}
}
return 4;
}
//Single component repeated
//e..g .wwww
bool Operand::IsSwizzleReplicated() const
{
if (iWriteMaskEnabled &&
iNumComponents == 4)
{
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == WWWW_SWIZZLE ||
ui32Swizzle == ZZZZ_SWIZZLE ||
ui32Swizzle == YYYY_SWIZZLE ||
ui32Swizzle == XXXX_SWIZZLE)
{
return true;
}
}
}
return false;
}
// Get the number of elements returned by operand, taking additional component mask into account
uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const
{
uint32_t count = 0;
switch (eType)
{
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
return 1; // TODO: does mask make any sense here?
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
// Adjust component count and break to more processing
((Operand *)this)->iNumComponents = 3;
break;
case OPERAND_TYPE_IMMEDIATE32:
case OPERAND_TYPE_IMMEDIATE64:
case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
case OPERAND_TYPE_OUTPUT_DEPTH:
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
uint32_t compMask = (1 << iNumComponents) - 1;
compMask &= _ui32CompMask;
// Calculate bits left in compMask
return HLSLcc::GetNumberBitsSet(compMask);
}
default:
{
break;
}
}
if (iWriteMaskEnabled &&
iNumComponents != 1)
{
//Component Mask
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
uint32_t compMask = ui32CompMask;
if (compMask == 0)
compMask = OPERAND_4_COMPONENT_MASK_ALL;
compMask &= _ui32CompMask;
if (compMask == OPERAND_4_COMPONENT_MASK_ALL)
return 4;
if (compMask & OPERAND_4_COMPONENT_MASK_X)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_Y)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_Z)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_W)
{
count++;
}
}
else
//Component Swizzle
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
uint32_t i;
for (i = 0; i < 4; ++i)
{
if ((_ui32CompMask & (1 << i)) == 0)
continue;
count++;
}
}
else
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X))
{
count++;
}
else
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y))
{
count++;
}
else
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z))
{
count++;
}
else
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W))
{
count++;
}
}
//Component Select 1
}
if (!count)
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
uint32_t compMask = (1 << iNumComponents) - 1;
compMask &= _ui32CompMask;
// Calculate bits left in compMask
return HLSLcc::GetNumberBitsSet(compMask);
}
return count;
}
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const
{
if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER)
return 0;
if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE)
return 0;
if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT)
return 0;
if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT)
return 0;
return 1;
}
int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
{
return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase);
}
SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
{
// The min precision qualifier overrides all of the stuff below
switch (eMinPrecision)
{
case OPERAND_MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case OPERAND_MIN_PRECISION_FLOAT_2_8:
return SVT_FLOAT10;
case OPERAND_MIN_PRECISION_SINT_16:
return SVT_INT16;
case OPERAND_MIN_PRECISION_UINT_16:
return SVT_UINT16;
default:
break;
}
switch (eType)
{
case OPERAND_TYPE_TEMP:
{
SHADER_VARIABLE_TYPE eCurrentType;
int i = 0;
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
return aeDataType[aui32Swizzle[0]];
}
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == (NO_SWIZZLE))
{
return aeDataType[0];
}
return aeDataType[aui32Swizzle[0]];
}
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
uint32_t mask = ui32CompMask;
if (!mask)
{
mask = OPERAND_4_COMPONENT_MASK_ALL;
}
for (; i < 4; ++i)
{
if (mask & (1 << i))
{
eCurrentType = aeDataType[i];
break;
}
}
#ifdef _DEBUG
//Check if all elements have the same basic type.
for (; i < 4; ++i)
{
if (mask & (1 << i))
{
if (eCurrentType != aeDataType[i])
{
ASSERT(0);
}
}
}
#endif
return eCurrentType;
}
ASSERT(0);
break;
}
case OPERAND_TYPE_OUTPUT:
{
const uint32_t ui32Register = ui32RegisterNumber;
int regSpace = GetRegisterSpace(psContext);
const ShaderInfo::InOutSignature* psOut = NULL;
if (regSpace == 0)
psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
&psOut);
else
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut);
ASSERT(psOut != NULL);
if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
{
switch (psOut->eMinPrec)
{
default:
ASSERT(0);
break;
case MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case MIN_PRECISION_FLOAT_2_8:
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return SVT_FLOAT16;
else
return SVT_FLOAT10;
case MIN_PRECISION_SINT_16:
return SVT_INT16;
case MIN_PRECISION_UINT_16:
return SVT_UINT16;
}
}
if (psOut->eComponentType == INOUT_COMPONENT_UINT32)
{
return SVT_UINT;
}
else if (psOut->eComponentType == INOUT_COMPONENT_SINT32)
{
return SVT_INT;
}
return SVT_FLOAT;
break;
}
case OPERAND_TYPE_INPUT:
{
const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1];
int regSpace = GetRegisterSpace(psContext);
const ShaderInfo::InOutSignature* psIn = NULL;
if (regSpace == 0)
{
if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0)
return SVT_FLOAT; // All combined inputs are stored as floats
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(),
&psIn);
}
else
{
if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0)
return SVT_FLOAT; // All combined inputs are stored as floats
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn);
}
ASSERT(psIn != NULL);
switch (eSpecialName)
{
//UINT in DX, INT in GL.
case NAME_PRIMITIVE_ID:
case NAME_VERTEX_ID:
case NAME_INSTANCE_ID:
case NAME_RENDER_TARGET_ARRAY_INDEX:
case NAME_VIEWPORT_ARRAY_INDEX:
case NAME_SAMPLE_INDEX:
return SVT_INT;
case NAME_IS_FRONT_FACE:
return SVT_UINT;
case NAME_POSITION:
case NAME_CLIP_DISTANCE:
return SVT_FLOAT;
default:
break;
// fall through
}
if (psIn->eSystemValueType == NAME_IS_FRONT_FACE)
return SVT_UINT;
if (eSpecialName == NAME_PRIMITIVE_ID || eSpecialName == NAME_VERTEX_ID)
{
return SVT_INT;
}
//UINT in DX, INT in GL.
if (psIn->eSystemValueType == NAME_INSTANCE_ID ||
psIn->eSystemValueType == NAME_PRIMITIVE_ID ||
psIn->eSystemValueType == NAME_VERTEX_ID ||
psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX ||
psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX ||
psIn->eSystemValueType == NAME_SAMPLE_INDEX
)
{
return SVT_INT;
}
if (psIn->eMinPrec != MIN_PRECISION_DEFAULT)
{
switch (psIn->eMinPrec)
{
default:
ASSERT(0);
break;
case MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case MIN_PRECISION_FLOAT_2_8:
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return SVT_FLOAT16;
else
return SVT_FLOAT10;
case MIN_PRECISION_SINT_16:
return SVT_INT16;
case MIN_PRECISION_UINT_16:
return SVT_UINT16;
}
}
if (psIn->eComponentType == INOUT_COMPONENT_UINT32)
{
return SVT_UINT;
}
else if (psIn->eComponentType == INOUT_COMPONENT_SINT32)
{
return SVT_INT;
}
return SVT_FLOAT;
break;
}
case OPERAND_TYPE_CONSTANT_BUFFER:
{
const ConstantBuffer* psCBuf = NULL;
const ShaderVarType* psVarType = NULL;
int32_t rebase = -1;
bool isArray;
int foundVar;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
if (psCBuf)
{
foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar && m_SubOperands[1].get() == NULL) // TODO: why this suboperand thing?
{
return psVarType->Type;
}
}
else
{
// Todo: this isn't correct yet.
return SVT_FLOAT;
}
break;
}
case OPERAND_TYPE_IMMEDIATE32:
{
return ePreferredTypeForImmediates;
}
case OPERAND_TYPE_IMMEDIATE64:
{
return SVT_DOUBLE;
}
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
{
return SVT_UINT;
}
case OPERAND_TYPE_SPECIAL_ADDRESS:
case OPERAND_TYPE_SPECIAL_LOOPCOUNTER:
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
case OPERAND_TYPE_INPUT_PRIMITIVEID:
{
return SVT_INT;
}
case OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
{
return SVT_UINT;
}
case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
{
return SVT_INT;
}
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
{
return SVT_INT;
}
case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // So are const arrays currently
default:
{
return SVT_FLOAT;
}
}
return SVT_FLOAT;
}
OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec)
{
switch (ePrec)
{
default:
case REFLECT_RESOURCE_PRECISION_UNKNOWN:
case REFLECT_RESOURCE_PRECISION_LOWP:
return OPERAND_MIN_PRECISION_FLOAT_2_8;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return OPERAND_MIN_PRECISION_FLOAT_16;
case REFLECT_RESOURCE_PRECISION_HIGHP:
return OPERAND_MIN_PRECISION_DEFAULT;
}
}
int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const
{
const ShaderInfo::InOutSignature *psSig = NULL;
int regSpace = GetRegisterSpace(psContext);
switch (eType)
{
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
return 1;
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
return 3;
default:
break;
}
if (regSpace == 0)
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
else
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
ASSERT(psSig != NULL);
// TODO: Are there ever any cases where the mask has 'holes'?
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
}

1018
src/Shader.cpp Normal file

File diff suppressed because it is too large Load Diff

387
src/ShaderInfo.cpp Normal file
View File

@ -0,0 +1,387 @@
#include "ShaderInfo.h"
#include "internal_includes/debug.h"
#include "internal_includes/tokens.h"
#include "Operand.h"
#include <stdlib.h>
#include <sstream>
SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
{
const ResourceBinding* psBinding = 0;
int found;
found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding);
ASSERT(found != 0);
return psBinding->GetDataType();
}
void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const
{
ASSERT(ui32MajorVersion > 3);
*ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]];
}
int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const
{
size_t i;
const size_t ui32NumBindings = psResourceBindings.size();
const ResourceBinding* psBindings = &psResourceBindings[0];
for (i = 0; i < ui32NumBindings; ++i)
{
if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup)
{
if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount))
{
*ppsOutBinding = psBindings + i;
return 1;
}
}
}
return 0;
}
int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const
{
size_t i;
const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size();
for (i = 0; i < ui32NumVars; ++i)
{
if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset &&
ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size))
{
*ppsShaderVar = &psThisPointerConstBuffer->asVars[i];
return 1;
}
}
return 0;
}
int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
{
size_t i;
const size_t ui32NumVars = psInputSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0))
{
*ppsOut = &psInputSignatures[i];
return 1;
}
}
ASSERT(allowNull);
return 0;
}
int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
{
size_t i;
const size_t ui32NumVars = psPatchConstantSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0))
{
*ppsOut = &psPatchConstantSignatures[i];
return 1;
}
}
if (allowNull)
return 0;
// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
// In those situations just take the last signature that uses that register (it's typically the "highest" one)
for (i = ui32NumVars - 1; i != 0xffffffff; i--)
{
if (ui32Register == psPatchConstantSignatures[i].ui32Register)
{
*ppsOut = &psPatchConstantSignatures[i];
return 1;
}
}
ASSERT(0);
return 0;
}
int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register,
const uint32_t ui32CompMask,
const uint32_t ui32Stream,
const InOutSignature** ppsOut,
bool allowNull /* = false */) const
{
size_t i;
const size_t ui32NumVars = psOutputSignatures.size();
ASSERT(ui32CompMask != 0);
for (i = 0; i < ui32NumVars; ++i)
{
if (ui32Register == psOutputSignatures[i].ui32Register &&
(ui32CompMask & psOutputSignatures[i].ui32Mask) &&
ui32Stream == psOutputSignatures[i].ui32Stream)
{
*ppsOut = &psOutputSignatures[i];
return 1;
}
}
ASSERT(allowNull);
return 0;
}
int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const
{
size_t i;
const size_t ui32NumVars = psOutputSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if (eSystemValueType == psOutputSignatures[i].eSystemValueType &&
ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex)
{
*ppsOut = &psOutputSignatures[i];
return 1;
}
}
ASSERT(0);
return 0;
}
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors)
{
// Struct size is calculated from the offset and size of its last member
if (psType->Class == SVC_STRUCT)
{
return psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors);
}
// Matrices represented as vec4 arrays have special size calculation
if (matrixAsVectors)
{
if (psType->Class == SVC_MATRIX_ROWS)
{
return psType->Rows * 16;
}
else if (psType->Class == SVC_MATRIX_COLUMNS)
{
return psType->Columns * 16;
}
}
// Regular matrices, vectors and scalars
return psType->Columns * psType->Rows * 4;
}
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
uint32_t parentOffset,
uint32_t offsetToFind,
bool* isArray,
std::vector<uint32_t>* arrayIndices,
int32_t* pi32Rebase,
uint32_t flags)
{
uint32_t thisOffset = parentOffset + psType->Offset;
uint32_t thisSize = GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
uint32_t paddedSize = thisSize;
if (thisSize % 16 > 0)
paddedSize += (16 - (thisSize % 16));
uint32_t arraySize = thisSize;
// Array elements are padded to align on vec4 size, except for the last one
if (psType->Elements)
arraySize = (paddedSize * (psType->Elements - 1)) + thisSize;
if ((offsetToFind >= thisOffset) &&
offsetToFind < (thisOffset + arraySize))
{
*isArray = false;
if (psType->Class == SVC_STRUCT)
{
if (psType->Elements > 1 && arrayIndices != NULL)
arrayIndices->push_back((offsetToFind - thisOffset) / thisSize);
// Need to bring offset back to element zero in case of array of structs
uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize;
uint32_t m = 0;
for (m = 0; m < psType->MemberCount; ++m)
{
const ShaderVarType* psMember = &psType->Members[m];
const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags);
if (foundType != NULL)
return foundType;
}
}
// Check for array of scalars or vectors (both take up 16 bytes per element).
// Matrices are also treated as arrays of vectors.
else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) ||
((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1))
{
*isArray = true;
if (arrayIndices != NULL)
arrayIndices->push_back((offsetToFind - thisOffset) / 16);
}
else if (psType->Class == SVC_VECTOR)
{
//Check for vector starting at a non-vec4 offset.
// cbuffer $Globals
// {
//
// float angle; // Offset: 0 Size: 4
// float2 angle2; // Offset: 4 Size: 8
//
// }
//cb0[0].x = angle
//cb0[0].yzyy = angle2.xyxx
//Rebase angle2 so that .y maps to .x, .z maps to .y
pi32Rebase[0] = thisOffset % 16;
}
return psType;
}
return NULL;
}
int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t(&pui32Swizzle)[4],
const ConstantBuffer* psCBuf,
const ShaderVarType** ppsShaderVar, // Output the found var
bool* isArray, // Output bool that tells if the found var is an array
std::vector<uint32_t>* arrayIndices, // Output vector of array indices in order from root parent to the found var
int32_t* pi32Rebase, // Output swizzle rebase
uint32_t flags)
{
size_t i;
uint32_t ui32ByteOffset = ui32Vec4Offset * 16;
//Swizzle can point to another variable. In the example below
//cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined
//into vectors. psCBuf->ui32NumVars will be 3.
// cbuffer cbUIUpdates
// {
// float g_fLifeSpan; // Offset: 0 Size: 4
// float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused]
// float g_fRadiusMin; // Offset: 8 Size: 4 [unused]
// float g_fRadiusMax; // Offset: 12 Size: 4 [unused]
// float g_fGrowTime; // Offset: 16 Size: 4 [unused]
// float g_fStepSize; // Offset: 20 Size: 4
// float g_fTurnRate; // Offset: 24 Size: 4
// float g_fTurnSpeed; // Offset: 28 Size: 4 [unused]
// float g_fLeafRate; // Offset: 32 Size: 4
// float g_fShrinkTime; // Offset: 36 Size: 4 [unused]
// uint g_uMaxFaces; // Offset: 40 Size: 4
// }
if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y)
{
ui32ByteOffset += 4;
}
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z)
{
ui32ByteOffset += 8;
}
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W)
{
ui32ByteOffset += 12;
}
const size_t ui32NumVars = psCBuf->asVars.size();
for (i = 0; i < ui32NumVars; ++i)
{
ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags);
if (ppsShaderVar[0] != NULL)
return 1;
}
return 0;
}
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
// Searches for brackets and inserts indices one by one.
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, std::vector<uint32_t> &indices)
{
std::ostringstream oss;
size_t prevpos = 0;
size_t pos = psShaderVar->fullName.find('[', 0);
uint32_t i = 0;
while (pos != std::string::npos)
{
pos++;
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
if (i < indices.size())
oss << indices[i];
prevpos = pos;
i++;
pos = psShaderVar->fullName.find('[', prevpos);
}
oss << psShaderVar->fullName.substr(prevpos);
return oss.str();
}
ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
{
switch (eType)
{
case RTYPE_CBUFFER:
return RGROUP_CBUFFER;
case RTYPE_SAMPLER:
return RGROUP_SAMPLER;
case RTYPE_TEXTURE:
case RTYPE_BYTEADDRESS:
case RTYPE_STRUCTURED:
return RGROUP_TEXTURE;
case RTYPE_UAV_RWTYPED:
case RTYPE_UAV_RWSTRUCTURED:
case RTYPE_UAV_RWBYTEADDRESS:
case RTYPE_UAV_APPEND_STRUCTURED:
case RTYPE_UAV_CONSUME_STRUCTURED:
case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
return RGROUP_UAV;
case RTYPE_TBUFFER:
ASSERT(0); // Need to find out which group this belongs to
return RGROUP_TEXTURE;
default:
break;
}
ASSERT(0);
return RGROUP_CBUFFER;
}
void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
{
if (info.empty())
return;
for (size_t i = 0; i < psResourceBindings.size(); i++)
{
ResourceBinding *rb = &psResourceBindings[i];
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE)
continue;
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match
// If match not found, check if name has "sampler" prefix
// -> try finding a match without the prefix (DX11 style sampler case)
if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
j = info.find(rb->name.substr(7, rb->name.size() - 7));
if (j != info.end())
rb->ePrecision = j->second;
}
}

887
src/UseDefineChains.cpp Normal file
View File

@ -0,0 +1,887 @@
#include "internal_includes/UseDefineChains.h"
#include "internal_includes/debug.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include <algorithm>
using HLSLcc::ForEachOperand;
#define DEBUG_UDCHAINS 0
#if DEBUG_UDCHAINS
// Debug mode
static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
DefineUseChain::iterator du = psDUChains[idx].begin();
UseDefineChain::iterator ud = psUDChains[idx].begin();
while (du != psDUChains[idx].end())
{
ASSERT(du->index == idx % 4);
// Check that the definition actually writes to idx
{
uint32_t tempReg = idx / 4;
uint32_t offs = idx - (tempReg * 4);
uint32_t accessMask = 1 << offs;
uint32_t i;
int found = 0;
for (i = 0; i < du->psInst->ui32FirstSrc; i++)
{
if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP)
{
if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg)
{
uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]);
if (writeMask & accessMask)
{
ASSERT(writeMask == du->writeMask);
found = 1;
break;
}
}
}
}
ASSERT(found);
}
// Check that each usage of each definition also is found in the use-define chain
UsageSet::iterator ul = du->usages.begin();
while (ul != du->usages.end())
{
// Search for the usage in the chain
UseDefineChain::iterator use = ud;
while (use != psUDChains[idx].end() && &*use != *ul)
use++;
ASSERT(use != psUDChains[idx].end());
ASSERT(&*use == *ul);
// Check that the mapping back is also found
ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end());
ul++;
}
du++;
}
}
static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
DefineUseChain::iterator du = psDUChains[idx].begin();
UseDefineChain::iterator ud = psUDChains[idx].begin();
while (ud != psUDChains[idx].end())
{
// Check that each definition of each usage also is found in the define-use chain
DefineSet::iterator dl = ud->defines.begin();
ASSERT(ud->psOp->ui32RegisterNumber == idx / 4);
ASSERT(ud->index == idx % 4);
while (dl != ud->defines.end())
{
// Search for the definition in the chain
DefineUseChain::iterator def = du;
while (def != psDUChains[idx].end() && &*def != *dl)
def++;
ASSERT(def != psDUChains[idx].end());
ASSERT(&*def == *dl);
// Check that the mapping back is also found
ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end());
dl++;
}
ud++;
}
}
static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
uint32_t i;
for (i = 0; i < tempRegs * 4; i++)
{
UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions);
UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions);
}
}
#define printf_console printf
#endif
using namespace HLSLcc::ControlFlow;
using std::for_each;
static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index)
{
// Try to find an existing entry
auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de)
{
return de.psInst == def.m_Instruction && de.psOp == def.m_Operand;
});
if (itr != psDUChain.end())
{
return &(*itr);
}
// Not found, create
psDUChain.push_front(DefineUseChainEntry());
DefineUseChainEntry &de = *psDUChain.begin();
de.psInst = (Instruction *)def.m_Instruction;
de.psOp = (Operand *)def.m_Operand;
de.index = index;
de.writeMask = def.m_Operand->GetAccessMask();
de.psSiblings[index] = &de;
return &de;
}
// Do flow control analysis on the instructions and build the define-use and use-define chains
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg)
{
Instruction *psFirstInstruction = &instructions[0];
Instruction *psLastInstruction = &instructions[instructions.size() - 1];
ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp
psDUChain.clear();
psUDChain.clear();
for (uint32_t i = 0; i < ui32NumTemps * 4; i++)
{
psUDChain.insert(std::make_pair(i, UseDefineChain()));
psDUChain.insert(std::make_pair(i, DefineUseChain()));
}
const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks();
// Loop through each block, first calculate the union of all the reachables of all preceding blocks
// and then build on that as we go along the basic block instructions
for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr<BasicBlock> &bptr)
{
const BasicBlock &b = *bptr.get();
BasicBlock::ReachableVariables rvars;
for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock)
{
const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock);
BasicBlock::RVarUnion(rvars, b.Reachable());
});
// Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions
for (const Instruction *inst = b.First(); inst <= b.Last(); inst++)
{
// Process sources first
ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Add an use for all visible definitions
psUDChain[regIdx].push_front(UseDefineChainEntry());
UseDefineChainEntry &ue = *psUDChain[regIdx].begin();
ue.psInst = (Instruction *)psInst;
ue.psOp = (Operand *)psOperand;
ue.accessMask = accessMask;
ue.index = k;
ue.psSiblings[k] = &ue;
// ue.siblings will be filled out later.
BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx];
for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def)
{
DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k);
ue.defines.insert(duentry);
duentry->usages.insert(&ue);
});
}
return;
});
// Then the destination operands
ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND,
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Overwrite whatever's in rvars; they are killed by this
rvars[regIdx].clear();
rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand));
// Make sure the definition gets created even though it doesn't have any uses at all
// (happens when sampling a texture but not all channels are used etc).
GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k);
}
return;
});
}
});
// Connect the siblings for all uses and definitions
for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair<const uint32_t, UseDefineChain> &udpair)
{
UseDefineChain &ud = udpair.second;
// Clear out the bottom 2 bits to get the actual base reg
uint32_t baseReg = udpair.first & ~(3);
for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue)
{
ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber);
// Go through each component
for (int k = 0; k < 4; k++)
{
// Skip components that we don't access, or the one that's our own
if (!(ue.accessMask & (1 << k)) || ue.index == k)
continue;
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; });
ASSERT(siblItr != psUDChain[baseReg + k].end());
UseDefineChainEntry &sibling = *siblItr;
ue.psSiblings[k] = &sibling;
}
});
});
// Same for definitions
for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair<const uint32_t, DefineUseChain> &dupair)
{
DefineUseChain &du = dupair.second;
// Clear out the bottom 2 bits to get the actual base reg
uint32_t baseReg = dupair.first & ~(3);
for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de)
{
ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber);
// Go through each component
for (int k = 0; k < 4; k++)
{
// Skip components that we don't access, or the one that's our own
if (!(de.writeMask & (1 << k)) || de.index == k)
continue;
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; });
ASSERT(siblItr != psDUChain[baseReg + k].end());
DefineUseChainEntry &sibling = *siblItr;
de.psSiblings[k] = &sibling;
}
});
});
#if DEBUG_UDCHAINS
UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions);
#endif
}
typedef std::vector<DefineUseChainEntry *> SplitDefinitions;
// Split out a define to use a new temp register
static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
uint32_t newReg = *psNumTemps;
uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber;
uint32_t accessMask = defs[0]->writeMask;
uint32_t i, u32def;
uint32_t rebase, count;
uint32_t splitTableValue;
ASSERT(defs.size() > 0);
for (i = 1; i < defs.size(); i++)
{
ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg);
accessMask |= defs[i]->writeMask;
}
(*psNumTemps)++;
#if DEBUG_UDCHAINS
UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions());
#endif
ASSERT(accessMask != 0 && accessMask <= 0xf);
// Calculate rebase value and component count
rebase = 0;
count = 0;
i = accessMask;
while ((i & 1) == 0)
{
rebase++;
i = i >> 1;
}
while (i != 0)
{
count++;
i = i >> 1;
}
// Make sure there's enough room in the split table
if (pui32SplitTable.size() <= newReg)
{
size_t newSize = pui32SplitTable.size() * 2;
pui32SplitTable.resize(newSize, 0xffffffff);
}
// Set the original temp of the new register
{
uint32_t origTemp = oldReg;
while (pui32SplitTable[origTemp] != 0xffffffff)
origTemp = pui32SplitTable[origTemp] & 0xffff;
ASSERT(rebase < 4);
ASSERT(count <= 4);
splitTableValue = (count << 24) | (rebase << 16) | origTemp;
pui32SplitTable[newReg] = splitTableValue;
}
// Insert the new temps to the map
for (i = newReg * 4; i < newReg * 4 + 4; i++)
{
psUDChains.insert(std::make_pair(i, UseDefineChain()));
psDUChains.insert(std::make_pair(i, DefineUseChain()));
}
for (u32def = 0; u32def < defs.size(); u32def++)
{
DefineUseChainEntry *defineToSplit = defs[u32def];
uint32_t oldIdx = defineToSplit->index;
#if DEBUG_UDCHAINS
printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count);
#endif
// We may have moved the opcodes already because of multiple defines pointing to the same op
if (defineToSplit->psOp->ui32RegisterNumber != newReg)
{
ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg);
// Update the declaration operand
// Don't change possible suboperands as they are sources
defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
}
defineToSplit->writeMask >>= rebase;
defineToSplit->index -= rebase;
// Change the temp register number for all usages
UsageSet::iterator ul = defineToSplit->usages.begin();
while (ul != defineToSplit->usages.end())
{
// Already updated by one of the siblings? Skip.
if ((*ul)->psOp->ui32RegisterNumber != newReg)
{
ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg);
(*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
}
// Update the UD chain
{
UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin();
while (udLoc != psUDChains[oldReg * 4 + oldIdx].end())
{
if (&*udLoc == *ul)
{
// Move to new list
psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc);
if (rebase > 0)
{
(*ul)->accessMask >>= rebase;
(*ul)->index -= rebase;
memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *));
}
break;
}
udLoc++;
}
}
ul++;
}
// Move the define out of the old chain (if its still there)
{
// Find the define in the old chain
DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin();
while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit))
{
duLoc++;
}
ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end());
{
// Move directly to new chain
psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc);
if (rebase != 0)
{
memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *));
}
}
}
}
#if DEBUG_UDCHAINS
UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions());
#endif
}
// Adds a define and all its siblings to the list, checking duplicates
static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef)
{
uint32_t k;
for (k = 0; k < 4; k++)
{
if (newDef->psSiblings[k])
{
DefineUseChainEntry *defToAdd = newDef->psSiblings[k];
uint32_t m;
int defFound = 0;
for (m = 0; m < defs.size(); m++)
{
if (defs[m] == defToAdd)
{
defFound = 1;
break;
}
}
if (defFound == 0)
{
defs.push_back(newDef->psSiblings[k]);
}
}
}
}
// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place
static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
uint32_t reg;
uint32_t combinedMask;
uint32_t i, k, u32def;
int canSplit = 1;
DefineUseChain::iterator du;
int hasLeftoverDefinitions = 0;
// Initial checks: all definitions must:
// Access the same register
// Have at least one definition in any of the 4 register slots that isn't included
if (defs.empty())
return 0;
reg = defs[0]->psOp->ui32RegisterNumber;
combinedMask = defs[0]->writeMask;
for (i = 1; i < defs.size(); i++)
{
if (reg != defs[i]->psOp->ui32RegisterNumber)
return 0;
combinedMask |= defs[i]->writeMask;
}
for (i = 0; i < 4; i++)
{
du = psDUChains[reg * 4 + i].begin();
while (du != psDUChains[reg * 4 + i].end())
{
int defFound = 0;
for (k = 0; k < defs.size(); k++)
{
if (&*du == defs[k])
{
defFound = 1;
break;
}
}
if (defFound == 0)
{
hasLeftoverDefinitions = 1;
break;
}
du++;
}
if (hasLeftoverDefinitions)
break;
}
// We'd be splitting the entire register and all its definitions, no point in that.
if (hasLeftoverDefinitions == 0)
return 0;
// Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array.
for (u32def = 0; u32def < defs.size(); u32def++)
{
DefineUseChainEntry *def = defs[u32def];
UsageSet::iterator ul = def->usages.begin();
while (ul != def->usages.end())
{
uint32_t j;
// Check that we only read a subset of the combined writemask
if (((*ul)->accessMask & (~combinedMask)) != 0)
{
// Do an additional attempt, pick up all the sibling definitions as well
// Only do this if we have the space in the definitions table
for (j = 0; j < 4; j++)
{
if (((*ul)->accessMask & (1 << j)) == 0)
continue;
AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin());
}
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
}
// It must have at least one declaration
ASSERT(!(*ul)->defines.empty());
// Check that all siblings for the usage use one of the definitions
for (j = 0; j < 4; j++)
{
uint32_t m;
int defineFound = 0;
if (((*ul)->accessMask & (1 << j)) == 0)
continue;
ASSERT((*ul)->psSiblings[j] != NULL);
ASSERT(!(*ul)->psSiblings[j]->defines.empty());
// Check that all definitions for this usage are found from the definitions table
DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin();
while (dl != (*ul)->psSiblings[j]->defines.end())
{
defineFound = 0;
for (m = 0; m < defs.size(); m++)
{
if (*dl == defs[m])
{
defineFound = 1;
break;
}
}
if (defineFound == 0)
{
// Add this define and all its siblings to the table and try again
AddDefineToList(defs, *dl);
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
canSplit = 0;
break;
}
dl++;
}
if (defineFound == 0)
{
canSplit = 0;
break;
}
}
if (canSplit == 0)
break;
// This'll do, check next usage
ul++;
}
if (canSplit == 0)
break;
}
if (canSplit)
{
UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
return 1;
}
return 0;
}
// Do temp splitting based on use-define chains
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
// Algorithm overview:
// Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable),
// split it out.
uint32_t i;
uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition
for (i = 0; i < tempsAtStart * 4; i++)
{
// No definitions?
if (psDUChains[i].empty())
continue;
DefineUseChain::iterator du = psDUChains[i].begin();
// Ok we have multiple definitions for a temp, check them through
while (du != psDUChains[i].end())
{
SplitDefinitions sd;
AddDefineToList(sd, &*du);
du++;
// If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain
if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable))
{
du = psDUChains[i].begin();
}
}
}
}
// Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process.
// Sampler ops, bitwise ops and comparisons are ignored.
static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType)
{
Instruction *psInst = du->psInst;
int hasFullPrecOperands = 0;
uint32_t i;
if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
return 0;
switch (psInst->eOpcode)
{
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_MOV:
case OPCODE_MAD:
case OPCODE_DIV:
case OPCODE_LOG:
case OPCODE_EXP:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_DP2:
case OPCODE_DP2ADD:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_RSQ:
case OPCODE_SQRT:
break;
default:
return 0;
}
for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++)
{
Operand *op = &psInst->asOperands[i];
if (op->eType == OPERAND_TYPE_IMMEDIATE32)
continue; // Immediate values are ignored
if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
{
hasFullPrecOperands = 1;
break;
}
}
if (hasFullPrecOperands)
return 0;
if (pType)
*pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump
return 1;
}
// Returns true if all the usages of this definitions are instructions that deal with floating point data
static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
{
UsageSet::iterator itr = du->usages.begin();
for (; itr != du->usages.end(); itr++)
{
Instruction *psInst = (*itr)->psInst;
if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
return false;
switch (psInst->eOpcode)
{
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_MOV:
case OPCODE_MAD:
case OPCODE_DIV:
case OPCODE_LOG:
case OPCODE_EXP:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_DP2:
case OPCODE_DP2ADD:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_RSQ:
case OPCODE_SQRT:
break;
default:
return false;
}
}
return true;
}
// Based on the sampler precisions, downgrade the definitions if possible.
void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps)
{
uint32_t madeProgress = 0;
do
{
uint32_t i;
madeProgress = 0;
for (i = 0; i < ui32NumTemps * 4; i++)
{
DefineUseChain::iterator du = psDUChains[i].begin();
while (du != psDUChains[i].end())
{
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
|| CanDowngradeDefinitionPrecision(du, &sType))
&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
&& du->isStandalone
&& HasOnlyFloatUsages(du))
{
uint32_t sibl;
// Ok we can change the precision.
ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP);
ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT);
du->psOp->eMinPrecision = sType;
// Update all the uses of all the siblings
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
while (ul != du->psSiblings[sibl]->usages.end())
{
ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT ||
(*ul)->psOp->eMinPrecision == sType);
// We may well write this multiple times to the same op but that's fine.
(*ul)->psOp->eMinPrecision = sType;
ul++;
}
}
madeProgress = 1;
}
du++;
}
}
} while (madeProgress != 0);
}
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps)
{
uint32_t i;
for (i = 0; i < ui32NumTemps * 4; i++)
{
DefineUseChain::iterator du = psDUChains[i].begin();
while (du != psDUChains[i].end())
{
uint32_t sibl;
int isStandalone = 1;
if (du->isStandalone)
{
du++;
continue;
}
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
while (ul != du->psSiblings[sibl]->usages.end())
{
uint32_t k;
ASSERT(!(*ul)->defines.empty());
// Need to check that all the siblings of this usage only see this definition's corresponding sibling
for (k = 0; k < 4; k++)
{
if (!(*ul)->psSiblings[k])
continue;
if ((*ul)->psSiblings[k]->defines.size() > 1
|| *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k])
{
isStandalone = 0;
break;
}
}
if (isStandalone == 0)
break;
ul++;
}
if (isStandalone == 0)
break;
}
if (isStandalone)
{
// Yep, mark it
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
du->psSiblings[sibl]->isStandalone = 1;
}
}
du++;
}
}
}
// Write the uses and defines back to Instruction and Operand member lists.
void WriteBackUsesAndDefines(DefineUseChains &psDUChains)
{
using namespace std;
// Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them
for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr)
{
const DefineUseChain &duChain = itr.second;
for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du)
{
for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage)
{
// Update instruction use list
du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp));
// And the usage's definition
usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp));
});
});
});
}

85
src/cbstring/bsafe.c Normal file
View File

@ -0,0 +1,85 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bsafe.c
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#include <stdio.h>
#include <stdlib.h>
#include "bsafe.h"
static int bsafeShouldExit = 1;
#if 0
char * strcpy (char *dst, const char *src);
char * strcat (char *dst, const char *src);
char * strcpy (char *dst, const char *src) {
dst = dst;
src = src;
fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * strcat (char *dst, const char *src) {
dst = dst;
src = src;
fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
char * (gets) (char * buf) {
buf = buf;
fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
#endif
char * (strncpy) (char *dst, const char *src, size_t n) {
dst = dst;
src = src;
n = n;
fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strncat) (char *dst, const char *src, size_t n) {
dst = dst;
src = src;
n = n;
fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strtok) (char *s1, const char *s2) {
s1 = s1;
s2 = s2;
fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
char * (strdup) (const char *s) {
s = s;
fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
if (bsafeShouldExit) exit (-1);
return NULL;
}
#endif

43
src/cbstring/bsafe.h Normal file
View File

@ -0,0 +1,43 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bsafe.h
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#ifndef BSTRLIB_BSAFE_INCLUDE
#define BSTRLIB_BSAFE_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
/* This is caught in the linker, so its not necessary for gcc. */
extern char * (gets) (char * buf);
#endif
extern char * (strncpy) (char *dst, const char *src, size_t n);
extern char * (strncat) (char *dst, const char *src, size_t n);
extern char * (strtok) (char *s1, const char *s2);
extern char * (strdup) (const char *s);
#undef strcpy
#undef strcat
#define strcpy(a,b) bsafe_strcpy(a,b)
#define strcat(a,b) bsafe_strcat(a,b)
#ifdef __cplusplus
}
#endif
#endif

1133
src/cbstring/bstraux.c Normal file

File diff suppressed because it is too large Load Diff

112
src/cbstring/bstraux.h Normal file
View File

@ -0,0 +1,112 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bstraux.h
*
* This file is not a necessary part of the core bstring library itself, but
* is just an auxilliary module which includes miscellaneous or trivial
* functions.
*/
#ifndef BSTRAUX_INCLUDE
#define BSTRAUX_INCLUDE
#include <time.h>
#include "bstrlib.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Safety mechanisms */
#define bstrDeclare(b) bstring (b) = NULL;
#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
/* Backward compatibilty with previous versions of Bstrlib */
#define bAssign(a,b) ((bassign)((a), (b)))
#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
#define bStrchr(b,c) ((bstrchr)((b), (c)))
#define bStrchrFast(b,c) ((bstrchr)((b), (c)))
#define bCatCstr(b,s) ((bcatcstr)((b), (s)))
#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len)))
#define bCatStatic(b,s) bCatBlk ((b), ("" s ""), sizeof (s) - 1)
#define bTrunc(b,n) ((btrunc)((b), (n)))
#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos)))
#define bUppercase(b) ((btoupper)(b))
#define bLowercase(b) ((btolower)(b))
#define bCaselessCmp(a,b) ((bstricmp)((a), (b)))
#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n)))
#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL))
#define bUuDecode(b) (bUuDecodeEx ((b), NULL))
/* Unusual functions */
extern struct bStream * bsFromBstr (const_bstring b);
extern bstring bTail (bstring b, int n);
extern bstring bHead (bstring b, int n);
extern int bSetCstrChar (bstring a, int pos, char c);
extern int bSetChar (bstring b, int pos, char c);
extern int bFill (bstring a, char c, int len);
extern int bReplicate (bstring b, int n);
extern int bReverse (bstring b);
extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill);
extern bstring bStrfTime (const char * fmt, const struct tm * timeptr);
#define bAscTime(t) (bStrfTime ("%c\n", (t)))
#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL)
/* Spacing formatting */
extern int bJustifyLeft (bstring b, int space);
extern int bJustifyRight (bstring b, int width, int space);
extern int bJustifyMargin (bstring b, int width, int space);
extern int bJustifyCenter (bstring b, int width, int space);
/* Esoteric standards specific functions */
extern char * bStr2NetStr (const_bstring b);
extern bstring bNetStr2Bstr (const char * buf);
extern bstring bBase64Encode (const_bstring b);
extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError);
extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines);
extern bstring bUuDecodeEx (const_bstring src, int * badlines);
extern bstring bUuEncode (const_bstring src);
extern bstring bYEncode (const_bstring src);
extern bstring bYDecode (const_bstring src);
/* Writable stream */
typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm);
int bwsWriteBstr (struct bwriteStream * stream, const_bstring b);
int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len);
int bwsWriteFlush (struct bwriteStream * stream);
int bwsIsEOF (const struct bwriteStream * stream);
int bwsBuffLength (struct bwriteStream * stream, int sz);
void * bwsClose (struct bwriteStream * stream);
/* Security functions */
#define bSecureDestroy(b) { \
bstring bstr__tmp = (b); \
if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \
(void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \
bdestroy (bstr__tmp); \
} \
}
#define bSecureWriteProtect(t) { \
if ((t).mlen >= 0) { \
if ((t).mlen > (t).slen)) { \
(void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
} \
(t).mlen = -1; \
} \
}
extern bstring bSecureInput (int maxlen, int termchar,
bNgetc vgetchar, void * vgcCtx);
#ifdef __cplusplus
}
#endif
#endif

2974
src/cbstring/bstrlib.c Normal file

File diff suppressed because it is too large Load Diff

304
src/cbstring/bstrlib.h Normal file
View File

@ -0,0 +1,304 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bstrlib.h
*
* This file is the header file for the core module for implementing the
* bstring functions.
*/
#ifndef BSTRLIB_INCLUDE
#define BSTRLIB_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
# if defined (__TURBOC__) && !defined (__BORLANDC__)
# define BSTRLIB_NOVSNP
# endif
#endif
#define BSTR_ERR (-1)
#define BSTR_OK (0)
#define BSTR_BS_BUFF_LENGTH_GET (0)
typedef struct tagbstring * bstring;
typedef const struct tagbstring * const_bstring;
/* Copy functions */
#define cstr2bstr bfromcstr
extern bstring bfromcstr (const char * str);
extern bstring bfromcstralloc (int mlen, const char * str);
extern bstring blk2bstr (const void * blk, int len);
extern char * bstr2cstr (const_bstring s, char z);
extern int bcstrfree (char * s);
extern bstring bstrcpy (const_bstring b1);
extern int bassign (bstring a, const_bstring b);
extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
extern int bassigncstr (bstring a, const char * str);
extern int bassignblk (bstring a, const void * s, int len);
/* Destroy function */
extern int bdestroy (bstring b);
/* Space allocation hinting functions */
extern int balloc (bstring s, int len);
extern int ballocmin (bstring b, int len);
/* Substring extraction */
extern bstring bmidstr (const_bstring b, int left, int len);
/* Various standard manipulations */
extern int bconcat (bstring b0, const_bstring b1);
extern int bconchar (bstring b0, char c);
extern int bcatcstr (bstring b, const char * s);
extern int bcatblk (bstring b, const void * s, int len);
extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
extern int bdelete (bstring s1, int pos, int len);
extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
extern int btrunc (bstring b, int n);
/* Scan/search functions */
extern int bstricmp (const_bstring b0, const_bstring b1);
extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
extern int biseqcaseless (const_bstring b0, const_bstring b1);
extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
extern int biseq (const_bstring b0, const_bstring b1);
extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
extern int biseqcstr (const_bstring b, const char * s);
extern int biseqcstrcaseless (const_bstring b, const char * s);
extern int bstrcmp (const_bstring b0, const_bstring b1);
extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
extern int binstr (const_bstring s1, int pos, const_bstring s2);
extern int binstrr (const_bstring s1, int pos, const_bstring s2);
extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
extern int bstrchrp (const_bstring b, int c, int pos);
extern int bstrrchrp (const_bstring b, int c, int pos);
#define bstrchr(b,c) bstrchrp ((b), (c), 0)
#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
extern int binchr (const_bstring b0, int pos, const_bstring b1);
extern int binchrr (const_bstring b0, int pos, const_bstring b1);
extern int bninchr (const_bstring b0, int pos, const_bstring b1);
extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
/* List of string container functions */
struct bstrList {
int qty, mlen;
bstring * entry;
};
extern struct bstrList * bstrListCreate (void);
extern int bstrListDestroy (struct bstrList * sl);
extern int bstrListAlloc (struct bstrList * sl, int msz);
extern int bstrListAllocMin (struct bstrList * sl, int msz);
/* String split and join functions */
extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
int (* cb) (void * parm, int ofs, int len), void * parm);
/* Miscellaneous functions */
extern int bpattern (bstring b, int len);
extern int btoupper (bstring b);
extern int btolower (bstring b);
extern int bltrimws (bstring b);
extern int brtrimws (bstring b);
extern int btrimws (bstring b);
/* <*>printf format functions */
#if !defined (BSTRLIB_NOVSNP)
extern bstring bformat (const char * fmt, ...);
extern int bformata (bstring b, const char * fmt, ...);
extern int bassignformat (bstring b, const char * fmt, ...);
extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
#define bvformata(ret, b, fmt, lastarg) { \
bstring bstrtmp_b = (b); \
const char * bstrtmp_fmt = (fmt); \
int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
for (;;) { \
va_list bstrtmp_arglist; \
va_start (bstrtmp_arglist, lastarg); \
bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
va_end (bstrtmp_arglist); \
if (bstrtmp_r >= 0) { /* Everything went ok */ \
bstrtmp_r = BSTR_OK; \
break; \
} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
bstrtmp_r = BSTR_ERR; \
break; \
} \
bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
} \
ret = bstrtmp_r; \
}
#endif
typedef int (*bNgetc) (void *parm);
typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
/* Input functions */
extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
extern bstring bread (bNread readPtr, void * parm);
extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int breada (bstring b, bNread readPtr, void * parm);
/* Stream functions */
extern struct bStream * bsopen (bNread readPtr, void * parm);
extern void * bsclose (struct bStream * s);
extern int bsbufflength (struct bStream * s, int sz);
extern int bsreadln (bstring b, struct bStream * s, char terminator);
extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
extern int bsread (bstring b, struct bStream * s, int n);
extern int bsreadlna (bstring b, struct bStream * s, char terminator);
extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
extern int bsreada (bstring b, struct bStream * s, int n);
extern int bsunread (struct bStream * s, const_bstring b);
extern int bspeek (bstring r, const struct bStream * s);
extern int bssplitscb (struct bStream * s, const_bstring splitStr,
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
extern int bseof (const struct bStream * s);
struct tagbstring {
int mlen;
int slen;
unsigned char * data;
};
/* Accessor macros */
#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
#define blength(b) (blengthe ((b), 0))
#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
#define bdatae(b, e) (bdataofse (b, 0, e))
#define bdata(b) (bdataofs (b, 0))
#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
#define bchar(b, p) bchare ((b), (p), '\0')
/* Static constant string initialization macro */
#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
#if defined(_MSC_VER)
/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
# define bsStatic(q) bsStaticMlen(q,-32)
#endif
#ifndef bsStatic
# define bsStatic(q) bsStaticMlen(q,-__LINE__)
#endif
/* Static constant block parameter pair */
#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
/* Reference building macros */
#define cstr2tbstr btfromcstr
#define btfromcstr(t,s) { \
(t).data = (unsigned char *) (s); \
(t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
(t).mlen = -1; \
}
#define blk2tbstr(t,s,l) { \
(t).data = (unsigned char *) (s); \
(t).slen = l; \
(t).mlen = -1; \
}
#define btfromblk(t,s,l) blk2tbstr(t,s,l)
#define bmid2tbstr(t,b,p,l) { \
const_bstring bstrtmp_s = (b); \
if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
int bstrtmp_left = (p); \
int bstrtmp_len = (l); \
if (bstrtmp_left < 0) { \
bstrtmp_len += bstrtmp_left; \
bstrtmp_left = 0; \
} \
if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
if (bstrtmp_len <= 0) { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} else { \
(t).data = bstrtmp_s->data + bstrtmp_left; \
(t).slen = bstrtmp_len; \
} \
} else { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} \
(t).mlen = -__LINE__; \
}
#define btfromblkltrimws(t,s,l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l); \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
#define btfromblkrtrimws(t,s,l) { \
int bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_len >= 0; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s; \
(t).slen = bstrtmp_len + 1; \
(t).mlen = -__LINE__; \
}
#define btfromblktrimws(t,s,l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
/* Write protection macros */
#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
#define biswriteprotected(t) ((t).mlen <= 0)
#ifdef __cplusplus
}
#endif
#endif

3202
src/cbstring/bstrlib.txt Normal file

File diff suppressed because it is too large Load Diff

29
src/cbstring/license.txt Normal file
View File

@ -0,0 +1,29 @@
Copyright (c) 2002-2008 Paul Hsieh
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of bstrlib nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

172
src/cbstring/porting.txt Normal file
View File

@ -0,0 +1,172 @@
Better String library Porting Guide
-----------------------------------
by Paul Hsieh
The bstring library is an attempt to provide improved string processing
functionality to the C and C++ language. At the heart of the bstring library
is the management of "bstring"s which are a significant improvement over '\0'
terminated char buffers. See the accompanying documenation file bstrlib.txt
for more information.
===============================================================================
Identifying the Compiler
------------------------
Bstrlib has been tested on the following compilers:
Microsoft Visual C++
Watcom C/C++ (32 bit flat)
Intel's C/C++ compiler (on Windows)
The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
Borland C++
Turbo C
There are slight differences in these compilers which requires slight
differences in the implementation of Bstrlib. These are accomodated in the
same sources using #ifdef/#if defined() on compiler specific macros. To
port Bstrlib to a new compiler not listed above, it is recommended that the
same strategy be followed. If you are unaware of the compiler specific
identifying preprocessor macro for your compiler you might find it here:
http://predef.sourceforge.net/precomp.html
Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
16-bit vs. 32-bit vs. 64-bit Systems
------------------------------------
Bstrlib has been architected to deal with strings of length between 0 and
INT_MAX (inclusive). Since the values of int are never higher than size_t
there will be no issue here. Note that on most 64-bit systems int is 32-bit.
Dependency on The C-Library
---------------------------
Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and
vsnprintf. Many free standing C compiler implementations that have a mode in
which the C library is not available will typically not include these
functions which will make porting Bstrlib to it onerous. Bstrlib is not
designed for such bare bones compiler environments. This usually includes
compilers that target ROM environments.
Porting Issues
--------------
Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there
are still a few porting issues. These are described below.
1. The vsnprintf () function.
Unfortunately, the earlier ANSI/ISO C standards did not include this function.
If the compiler of interest does not support this function then the
BSTRLIB_NOVSNP should be defined via something like:
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
# if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
# define BSTRLIB_NOVSNP
# endif
#endif
which appears at the top of bstrlib.h. Note that the bformat(a) functions
will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If
the compiler has renamed vsnprintf() to some other named function, then
search for the definition of the exvsnprintf macro in bstrlib.c file and be
sure its defined appropriately:
#if defined (__COMPILERVENDORSPECIFICMACRO__)
# define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
#else
# define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
#endif
Take notice of the return value being captured in the variable r. It is
assumed that r exceeds n if and only if the underlying vsnprintf function has
determined what the true maximal output length would be for output if the
buffer were large enough to hold it. Non-modern implementations must output a
lesser number (the macro can and should be modified to ensure this).
2. Weak C++ compiler.
C++ is a much more complicated language to implement than C. This has lead
to varying quality of compiler implementations. The weaknesses isolated in
the initial ports are inclusion of the Standard Template Library,
std::iostream and exception handling. By default it is assumed that the C++
compiler supports all of these things correctly. If your compiler does not
support one or more of these define the corresponding macro:
BSTRLIB_CANNOT_USE_STL
BSTRLIB_CANNOT_USE_IOSTREAM
BSTRLIB_DOESNT_THROW_EXCEPTIONS
The compiler specific detected macro should be defined at the top of
bstrwrap.h in the Configuration defines section. Note that these disabling
macros can be overrided with the associated enabling macro if a subsequent
version of the compiler gains support. (For example, its possible to rig
up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
can be passed in as a compiler option.)
3. The bsafe module, and reserved words.
The bsafe module is in gross violation of the ANSI/ISO C standard in the
sense that it redefines what could be implemented as reserved words on a
given compiler. The typical problem is that a compiler may inline some of the
functions and thus not be properly overridden by the definitions in the bsafe
module. It is also possible that a compiler may prohibit the redefinitions in
the bsafe module. Compiler specific action will be required to deal with
these situations.
Platform Specific Files
-----------------------
The makefiles for the examples are basically setup of for particular
environments for each platform. In general these makefiles are not portable
and should be constructed as necessary from scratch for each platform.
Testing a port
--------------
To test that a port compiles correctly do the following:
1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and
bsafe modules.
2. Compile bstest against the bstrlib module.
3. Run bstest and ensure that 0 errors are reported.
4. Compile test against the bstrlib and bstrwrap modules.
5. Run test and ensure that 0 errors are reported.
6. Compile each of the examples (except for the "re" example, which may be
complicated and is not a real test of bstrlib and except for the mfcbench
example which is Windows specific.)
7. Run each of the examples.
The builds must have 0 errors, and should have the absolute minimum number of
warnings (in most cases can be reduced to 0.) The result of execution should
be essentially identical on each platform.
Performance
-----------
Different CPU and compilers have different capabilities in terms of
performance. It is possible for Bstrlib to assume performance
characteristics that a platform doesn't have (since it was primarily
developed on just one platform). The goal of Bstrlib is to provide very good
performance on all platforms regardless of this but without resorting to
extreme measures (such as using assembly language, or non-portable intrinsics
or library extensions.)
There are two performance benchmarks that can be found in the example/
directory. They are: cbench.c and cppbench.cpp. These are variations and
expansions of a benchmark for another string library. They don't cover all
string functionality, but do include the most basic functions which will be
common in most string manipulation kernels.
...............................................................................
Feedback
--------
In all cases, you may email issues found to the primary author of Bstrlib at
the email address: websnarf@users.sourceforge.net
===============================================================================

221
src/cbstring/security.txt Normal file
View File

@ -0,0 +1,221 @@
Better String library Security Statement
----------------------------------------
by Paul Hsieh
===============================================================================
Introduction
------------
The Better String library (hereafter referred to as Bstrlib) is an attempt to
provide improved string processing functionality to the C and C++ languages.
At the heart of the Bstrlib is the management of "bstring"s which are a
significant improvement over '\0' terminated char buffers. See the
accompanying documenation file bstrlib.txt for more information.
DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Like any software, there is always a possibility of failure due to a flawed
implementation. Nevertheless a good faith effort has been made to minimize
such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an
application secure or free from implementation failures. However, it is the
author's conviction that use of Bstrlib can greatly facilitate the creation
of software meeting the highest possible standards of security.
Part of the reason why this document has been created, is for the purpose of
security auditing, or the creation of further "Statements on Security" for
software that is created that uses Bstrlib. An auditor may check the claims
below against Bstrlib, and use this as a basis for analysis of software which
uses Bstrlib.
===============================================================================
Statement on Security
---------------------
This is a document intended to give consumers of the Better String Library
who are interested in security an idea of where the Better String Library
stands on various security issues. Any deviation observed in the actual
library itself from the descriptions below should be considered an
implementation error, not a design flaw.
This statement is not an analytical proof of correctness or an outline of one
but rather an assertion similar to a scientific claim or hypothesis. By use,
testing and open independent examination (otherwise known as scientific
falsifiability), the credibility of the claims made below can rise to the
level of an established theory.
Common security issues:
.......................
1. Buffer Overflows
The Bstrlib API allows the programmer a way to deal with strings without
having to deal with the buffers containing them. Ordinary usage of the
Bstrlib API itself makes buffer overflows impossible.
Furthermore, the Bstrlib API has a superset of basic string functionality as
compared to the C library's char * functions, C++'s std::string class and
Microsoft's MFC based CString class. It also has abstracted mechanisms for
dealing with IO. This is important as it gives developers a way of migrating
all their code from a functionality point of view.
2. Memory size overflow/wrap around attack
Bstrlib is, by design, impervious to memory size overflow attacks. The
reason is it is resiliant to length overflows is that bstring lengths are
bounded above by INT_MAX, instead of ~(size_t)0. So length addition
overflows cause a wrap around of the integer value making them negative
causing balloc() to fail before an erroneous operation can occurr. Attempted
conversions of char * strings which may have lengths greater than INT_MAX are
detected and the conversion is aborted.
It is unknown if this property holds on machines that don't represent
integers as 2s complement. It is recommended that Bstrlib be carefully
auditted by anyone using a system which is not 2s complement based.
3. Constant string protection
Bstrlib implements runtime enforced constant and read-only string semantics.
I.e., bstrings which are declared as constant via the bsStatic() macro cannot
be modified or deallocated directly through the Bstrlib API, and this cannot
be subverted by casting or other type coercion. This is independent of the
use of the const_bstring data type.
The Bstrlib C API uses the type const_bstring to specify bstring parameters
whose contents do not change. Although the C language cannot enforce this,
this is nevertheless guaranteed by the implementation of the Bstrlib library
of C functions. The C++ API enforces the const attribute on CBString types
correctly.
4. Aliased bstring support
Bstrlib detects and supports aliased parameter management throughout the API.
The kind of aliasing that is allowed is the one where pointers of the same
basic type may be pointing to overlapping objects (this is the assumption the
ANSI C99 specification makes.) Each function behaves as if all read-only
parameters were copied to temporaries which are used in their stead before
the function is enacted (it rarely actually does this). No function in the
Bstrlib uses the "restrict" parameter attribute from the ANSI C99
specification.
5. Information leaking
In bstraux.h, using the semantically equivalent macros bSecureDestroy() and
bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively
will ensure that stale data does not linger in the heap's free space after
strings have been released back to memory. Created bstrings or CBStrings
are not linked to anything external to themselves, and thus cannot expose
deterministic data leaking. If a bstring is resized, the preimage may exist
as a copy that is released to the heap. Thus for sensitive data, the bstring
should be sufficiently presized before manipulated so that it is not resized.
bSecureInput() has been supplied in bstraux.c, which can be used to obtain
input securely without any risk of leaving any part of the input image in the
heap except for the allocated bstring that is returned.
6. Memory leaking
Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG
macro. User generated definitions for malloc, realloc and free can then be
supplied which can implement special strategies for memory corruption
detection or memory leaking. Otherwise, bstrlib does not do anything out of
the ordinary to attempt to deal with the standard problem of memory leaking
(i.e., losing references to allocated memory) when programming in the C and
C++ languages. However, it does not compound the problem any more than exists
either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib
does not preclude the use of automatic garbage collection mechanisms such as
the Boehm garbage collector.
7. Encryption
Bstrlib does not present any built-in encryption mechanism. However, it
supports full binary contents in its data buffers, so any standard block
based encryption mechanism can make direct use of bstrings/CBStrings for
buffer management.
8. Double freeing
Freeing a pointer that is already free is an extremely rare, but nevertheless
a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
reboot, by calling free mulitiple times on already freed data using the WATCOM
CRT.) Bstrlib invalidates the bstring header data before freeing, so that in
many cases a double free will be detected and an error will be reported
(though this behaviour is not guaranteed and should not be relied on).
Using bstrFree pervasively (instead of bdestroy) can lead to somewhat
improved invalid free avoidance (it is completely safe whenever bstring
instances are only stored in unique variables). For example:
struct tagbstring hw = bsStatic ("Hello, world");
bstring cpHw = bstrcpy (&hw);
#ifdef NOT_QUITE_AS_SAFE
bdestroy (cpHw); /* Never fail */
bdestroy (cpHw); /* Error sometimes detected at runtime */
bdestroy (&hw); /* Error detected at run time */
#else
bstrFree (cpHw); /* Never fail */
bstrFree (cpHw); /* Will do nothing */
bstrFree (&hw); /* Will lead to a compile time error */
#endif
9. Resource based denial of service
bSecureInput() has been supplied in bstraux.c. It has an optional upper limit
for input length. But unlike fgets(), it is also easily determined if the
buffer has been truncated early. In this way, a program can set an upper limit
on input sizes while still allowing for implementing context specific
truncation semantics (i.e., does the program consume but dump the extra
input, or does it consume it in later inputs?)
10. Mixing char *'s and bstrings
The bstring and char * representations are not identical. So there is a risk
when converting back and forth that data may lost. Essentially bstrings can
contain '\0' as a valid non-terminating character, while char * strings
cannot and in fact must use the character as a terminator. The risk of data
loss is very low, since:
A) the simple method of only using bstrings in a char * semantically
compatible way is both easy to achieve and pervasively supported.
B) obtaining '\0' content in a string is either deliberate or indicative
of another, likely more serious problem in the code.
C) the library comes with various functions which deal with this issue
(namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
Marginal security issues:
.........................
11. 8-bit versus 9-bit portability
Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent
possible to avoid portability problems. However, Bstrlib has not been tested
on any system that does not represent char as 8-bits. So whether or not it
works on 9-bit systems is an open question. It is recommended that Bstrlib be
carefully auditted by anyone using a system in which CHAR_BIT is not 8.
12. EBCDIC/ASCII/UTF-8 data representation attacks.
Bstrlib uses ctype.h functions to ensure that it remains portable to non-
ASCII systems. It also checks range to make sure it is well defined even for
data that ANSI does not define for the ctype functions.
Obscure issues:
...............
13. Data attributes
There is no support for a Perl-like "taint" attribute, however, an example of
how to do this using C++'s type system is given as an example.

1639
src/decode.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,163 @@
#pragma once
#include <set>
#include <map>
#include <utility>
#include <vector>
#include <memory>
#ifdef __APPLE__
#include <tr1/memory>
#endif
#include <stdint.h>
struct Instruction;
class Operand;
namespace HLSLcc
{
#ifdef __APPLE__
// Herp derp Apple is stuck in 2005
using namespace std::tr1;
#else
using namespace std;
#endif
namespace ControlFlow
{
class BasicBlock;
class ControlFlowGraph
{
friend class BasicBlock;
public:
ControlFlowGraph()
: m_BlockMap()
, m_BlockStorage()
{}
typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
const BasicBlock &Build(const Instruction *firstInstruction);
// Only works for instructions that start the basic block
const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
// non-const version for BasicBlock
BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction);
const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; }
private:
// Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block
typedef std::map<const Instruction *, BasicBlock *> BasicBlockMap;
BasicBlockMap m_BlockMap;
// auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these
BasicBlockStorage m_BlockStorage;
};
class BasicBlock
{
friend class ControlFlowGraph;
public:
// A set of register indices, one per each vec4 component per register
typedef std::set<uint32_t> RegisterSet;
// The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block
typedef std::set<const Instruction *> ConnectionSet;
struct Definition
{
Definition(const Instruction *i = NULL, const Operand *o = NULL)
: m_Instruction(i)
, m_Operand(o)
{}
Definition(const Definition &a)
: m_Instruction(a.m_Instruction)
, m_Operand(a.m_Operand)
{}
bool operator==(const Definition &a) const
{
if (a.m_Instruction != m_Instruction)
return false;
return a.m_Operand == m_Operand;
}
bool operator!=(const Definition &a) const
{
if (a.m_Instruction == m_Instruction)
return false;
return a.m_Operand != m_Operand;
}
bool operator<(const Definition &a) const
{
if (m_Instruction != a.m_Instruction)
return m_Instruction < a.m_Instruction;
return m_Operand < a.m_Operand;
}
const Instruction *m_Instruction;
const Operand *m_Operand;
};
typedef std::set<Definition> ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable
typedef std::map<uint32_t, ReachableDefinitionsPerVariable> ReachableVariables; // A VisibleDefinitionSet for each variable*component.
const Instruction *First() const { return m_First; }
const Instruction *Last() const { return m_Last; }
const RegisterSet &UEVar() const { return m_UEVar; }
const RegisterSet &VarKill() const { return m_VarKill; }
const ConnectionSet &Preceding() const { return m_Preceding; }
const ConnectionSet &Succeeding() const { return m_Succeeding; }
const ReachableVariables &DEDef() const { return m_DEDef; }
const ReachableVariables &Reachable() const { return m_Reachable; }
// Helper function: Do union of 2 ReachableVariables, store result in a.
static void RVarUnion(ReachableVariables &a, const ReachableVariables &b);
private:
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead);
// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
void Build();
bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed.
BasicBlock * AddChildBasicBlock(const Instruction *psFirst);
private:
ControlFlowGraph &m_Graph; // The graph object containing this block
const Instruction *m_First; // The first instruction in the basic block
const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
RegisterSet m_VarKill; // Set of variables that are defined in this block.
ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG
ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG
ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set.
ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block.
};
};
};

View File

@ -0,0 +1,31 @@
#pragma once
struct Instruction;
namespace HLSLcc
{
namespace ControlFlow
{
class Utils
{
public:
// For a given flow-control instruction, find the corresponding jump location:
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
// For ELSE, find same level ENDIF + 1
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
// For ENDLOOP, find previous same-level LOOP + 1
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
// For CONTINUE/C the previous LOOP + 1
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
// Note that CASE labels fall through.
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
// If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH
// If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it.
static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0);
static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0);
};
}
}

View File

@ -0,0 +1,15 @@
#pragma once
#include "include/ShaderInfo.h"
#include <vector>
class HLSLCrossCompilerContext;
struct Instruction;
namespace HLSLcc
{
namespace DataTypeAnalysis
{
void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> &instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results);
};
};

View File

@ -0,0 +1,101 @@
#pragma once
#include <vector>
#include <set>
#include "internal_includes/tokens.h"
#include "internal_includes/Operand.h"
typedef struct ICBVec4_TAG {
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
} ICBVec4;
#define ACCESS_FLAG_READ 0x1
#define ACCESS_FLAG_WRITE 0x2
struct Declaration
{
Declaration()
:
eOpcode(OPCODE_INVALID),
ui32NumOperands(0),
ui32BufferStride(0)
{}
OPCODE_TYPE eOpcode;
uint32_t ui32NumOperands;
Operand asOperands[2];
std::vector<ICBVec4> asImmediateConstBuffer;
//The declaration can set one of these
//values depending on the opcode.
union {
uint32_t ui32GlobalFlags;
uint32_t ui32NumTemps;
RESOURCE_DIMENSION eResourceDimension;
INTERPOLATION_MODE eInterpolation;
PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology;
PRIMITIVE eInputPrimitive;
uint32_t ui32MaxOutputVertexCount;
TESSELLATOR_DOMAIN eTessDomain;
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
uint32_t aui32WorkGroupSize[3];
uint32_t ui32HullPhaseInstanceCount;
float fMaxTessFactor;
uint32_t ui32IndexRange;
uint32_t ui32GSInstanceCount;
struct Interface_TAG
{
uint32_t ui32InterfaceID;
uint32_t ui32NumFuncTables;
uint32_t ui32ArraySize;
} interface;
} value;
uint32_t ui32BufferStride;
struct UAV_TAG
{
UAV_TAG() :
ui32GloballyCoherentAccess(0),
bCounter(0),
Type(RETURN_TYPE_UNORM),
ui32NumComponents(0),
ui32AccessFlags(0)
{
}
uint32_t ui32GloballyCoherentAccess;
uint8_t bCounter;
RESOURCE_RETURN_TYPE Type;
uint32_t ui32NumComponents;
uint32_t ui32AccessFlags;
} sUAV;
struct TGSM_TAG
{
uint32_t ui32Stride;
uint32_t ui32Count;
} sTGSM;
struct IndexableTemp_TAG
{
uint32_t ui32RegIndex;
uint32_t ui32RegCount;
uint32_t ui32RegComponentSize;
} sIdxTemp;
uint32_t ui32TableLength;
uint32_t ui32IsShadowTex;
// Set indexed by sampler register number.
std::set<uint32_t> samplersUsed;
};

View File

@ -0,0 +1,50 @@
#pragma once
#include <stdint.h>
#include <string>
#include "bstrlib.h"
class Shader;
class GLSLCrossDependencyData;
class ShaderPhase;
class Translator;
class Operand;
class HLSLccReflection;
class HLSLCrossCompilerContext
{
public:
HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {}
bstring glsl;
bstring extensions;
bstring* currentGLSLString;//either glsl or earlyMain of current phase
uint32_t currentPhase;
int indent;
unsigned int flags;
Shader* psShader;
GLSLCrossDependencyData* psDependencies;
const char *inputPrefix; // Prefix for shader inputs
const char *outputPrefix; // Prefix for shader outputs
void DoDataTypeAnalysis(ShaderPhase *psPhase);
void ClearDependencyData();
void AddIndentation();
// Currently active translator
Translator *psTranslator;
HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info
// Retrieve the name for which the input or output is declared as. Takes into account possible redirections.
std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const;
std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const;
bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
};

View File

@ -0,0 +1,127 @@
#pragma once
#include "hlslcc.h"
#include "bstrlib.h"
#include <vector>
#include <string>
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
class HLSLCrossCompilerContext;
namespace HLSLcc
{
uint32_t GetNumberBitsSet(uint32_t a);
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType);
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags);
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
const char * GetConstructorForTypeGLSL(const SHADER_VARIABLE_TYPE eType,
const int components, bool useGLSLPrecision);
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType,
const int components);
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
void AddSwizzleUsingElementCount(bstring dest, uint32_t count);
int WriteMaskToComponentCount(uint32_t writeMask);
uint32_t BuildComponentMaskFromElementCount(int count);
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src);
// Convert resource return type to SVT_ flags
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType);
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB);
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim);
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b);
// Returns true if the instruction adds 1 to the destination temp register
bool IsAddOneInstruction(const Instruction *psInst);
bool CanDoDirectCast(SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f);
// Flags for ForeachOperand
// Process suboperands
#define FEO_FLAG_SUBOPERAND 1
// Process src operands
#define FEO_FLAG_SRC_OPERAND 2
// Process destination operands
#define FEO_FLAG_DEST_OPERAND 4
// Convenience: Process all operands, both src and dest, and all suboperands
#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND)
// For_each for all operands within a range of instructions. Flags above.
template<typename ItrType, typename F> void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback)
{
ItrType inst = _begin;
while (inst != _end)
{
uint32_t i, k;
if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
{
for (i = 0; i < inst->ui32FirstSrc; i++)
{
if (flags & FEO_FLAG_SUBOPERAND)
{
for (k = 0; k < MAX_SUB_OPERANDS; k++)
{
if (inst->asOperands[i].m_SubOperands[k].get())
{
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
}
}
}
if (flags & FEO_FLAG_DEST_OPERAND)
{
callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND);
}
}
}
if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
{
for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++)
{
if (flags & FEO_FLAG_SUBOPERAND)
{
for (k = 0; k < MAX_SUB_OPERANDS; k++)
{
if (inst->asOperands[i].m_SubOperands[k].get())
{
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
}
}
}
if (flags & FEO_FLAG_SRC_OPERAND)
{
callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND);
}
}
}
inst++;
}
}
};

View File

@ -0,0 +1,134 @@
#pragma once
#include "internal_includes/Operand.h"
#include "internal_includes/tokens.h"
#include "include/ShaderInfo.h"
#include <memory>
#define ATOMIC_ADDRESS_BASIC 0
#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1
#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2
#define TEXSMP_FLAG_NONE 0x0
#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand
#define TEXSMP_FLAG_DEPTHCOMPARE 0x2
#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0
#define TEXSMP_FLAG_BIAS 0x8
#define TEXSMP_FLAG_GRAD 0x10
//Gather specific flags
#define TEXSMP_FLAG_GATHER 0x20
#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand
struct Instruction
{
Instruction()
: eOpcode(OPCODE_NOP)
, eBooleanTestType(INSTRUCTION_TEST_ZERO)
, ui32NumOperands(0)
, ui32FirstSrc(0)
, m_Uses()
, m_SkipTranslation(false)
, m_InductorRegister(0)
, bSaturate(0)
{
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
}
// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0)
{
id = _id;
eOpcode = opcode;
eBooleanTestType = INSTRUCTION_TEST_ZERO;
ui32FirstSrc = 0;
ui32NumOperands = 0;
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
m_SkipTranslation = false;
m_InductorRegister = 0;
if (reg1Mask == 0)
return;
ui32NumOperands++;
asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP;
asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1;
asOperands[0].ui32CompMask = reg1Mask;
asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg2Mask == 0)
return;
ui32FirstSrc = 1;
ui32NumOperands++;
asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2;
asOperands[1].ui32CompMask = reg2Mask;
asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg3Mask == 0)
return;
ui32NumOperands++;
asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3;
asOperands[2].ui32CompMask = reg3Mask;
asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg4Mask == 0)
return;
ui32NumOperands++;
asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4;
asOperands[3].ui32CompMask = reg4Mask;
asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
}
bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const;
// Flags for ChangeOperandTempRegister
#define UD_CHANGE_SUBOPERANDS 1
#define UD_CHANGE_MAIN_OPERAND 2
#define UD_CHANGE_ALL 3
void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase);
OPCODE_TYPE eOpcode;
INSTRUCTION_TEST_BOOLEAN eBooleanTestType;
uint32_t ui32SyncFlags;
uint32_t ui32NumOperands;
uint32_t ui32FirstSrc;
Operand asOperands[6];
uint32_t bSaturate;
uint32_t ui32FuncIndexWithinInterface;
RESINFO_RETURN_TYPE eResInfoReturnType;
int bAddressOffset;
int8_t iUAddrOffset;
int8_t iVAddrOffset;
int8_t iWAddrOffset;
RESOURCE_RETURN_TYPE xType, yType, zType, wType;
RESOURCE_DIMENSION eResDim;
int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
struct Use
{
Use() : m_Inst(0), m_Op(0) {}
Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
Instruction *m_Inst; // The instruction that references the result of this instruction
Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand.
};
std::vector<Use> m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg.
Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment.
bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation)
uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it
uint64_t id;
};

View File

@ -0,0 +1,9 @@
#pragma once
class ShaderPhase;
namespace HLSLcc
{
void DoLoopTransform(ShaderPhase &phase);
};

View File

@ -0,0 +1,152 @@
#pragma once
#include "internal_includes/tokens.h"
#include <vector>
#include <memory>
#ifdef __APPLE__
#include <tr1/memory>
#endif
enum{ MAX_SUB_OPERANDS = 3 };
class Operand;
class HLSLCrossCompilerContext;
struct Instruction;
#if _MSC_VER
// We want to disable the "array will be default-initialized" warning, as that's exactly what we want
#pragma warning(disable: 4351)
#endif
class Operand
{
public:
#ifdef __APPLE__
// Herp derp Apple is stuck in 2005
typedef std::tr1::shared_ptr<Operand> SubOperandPtr;
#else
typedef std::shared_ptr<Operand> SubOperandPtr;
#endif
Operand()
:
iExtended(),
eType(),
eModifier(),
eMinPrecision(),
iIndexDims(),
iWriteMask(),
iGSInput(),
iPSInOut(),
iWriteMaskEnabled(),
iArrayElements(),
iNumComponents(),
eSelMode(),
ui32CompMask(),
ui32Swizzle(),
aui32Swizzle(),
aui32ArraySizes(),
ui32RegisterNumber(),
afImmediates(),
adImmediates(),
eSpecialName(),
specialName(),
eIndexRep(),
m_SubOperands(),
aeDataType(),
m_Rebase(0),
m_Size(0),
m_Defines(),
m_ForLoopInductorName(0)
#ifdef _DEBUG
, id(0)
#endif
{}
// Retrieve the mask of all the components this operand accesses (either reads from or writes to).
// Note that destination writemask does affect the effective access mask.
uint32_t GetAccessMask() const;
// Returns the index of the highest accessed component, based on component mask
int GetMaxComponent() const;
bool IsSwizzleReplicated() const;
// Get the number of elements returned by operand, taking additional component mask into account
//e.g.
//.z = 1
//.x = 1
//.yw = 2
uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const;
// When this operand is used as an input declaration, how many components does it have?
int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const;
// Retrieve the operand data type.
SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const;
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const;
// Same as above but with explicit shader type and phase
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);
int iExtended;
OPERAND_TYPE eType;
OPERAND_MODIFIER eModifier;
OPERAND_MIN_PRECISION eMinPrecision;
int iIndexDims;
int iWriteMask;
int iGSInput;
int iPSInOut;
int iWriteMaskEnabled;
int iArrayElements;
int iNumComponents;
OPERAND_4_COMPONENT_SELECTION_MODE eSelMode;
uint32_t ui32CompMask;
uint32_t ui32Swizzle;
uint32_t aui32Swizzle[4];
uint32_t aui32ArraySizes[3];
uint32_t ui32RegisterNumber;
//If eType is OPERAND_TYPE_IMMEDIATE32
float afImmediates[4];
//If eType is OPERAND_TYPE_IMMEDIATE64
double adImmediates[4];
SPECIAL_NAME eSpecialName;
std::string specialName;
OPERAND_INDEX_REPRESENTATION eIndexRep[3];
SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS];
//One type for each component.
SHADER_VARIABLE_TYPE aeDataType[4];
uint32_t m_Rebase; // Rebase value, for constant array accesses.
uint32_t m_Size; // Component count, only for constant array access.
struct Define
{
Define() : m_Inst(0), m_Op(0) {}
Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {}
Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {}
Instruction *m_Inst; // Instruction that writes to the temp
Operand *m_Op; // The (destination) operand within that instruction.
};
std::vector<Define> m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP)
uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber)
#ifdef _DEBUG
uint64_t id;
#endif
};

View File

@ -0,0 +1,267 @@
#pragma once
#include <vector>
#include <string>
#include <map>
#include "growing_array.h"
#include "internal_includes/tokens.h"
#include "internal_includes/reflect.h"
#include "include/ShaderInfo.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/ControlFlowGraph.h"
#include "bstrlib.h"
struct ConstantArrayChunk
{
ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {}
ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
: m_Size(sz), m_AccessMask(mask)
{
m_UseSites.push_back(firstUse);
}
uint32_t m_Size;
uint32_t m_AccessMask;
uint32_t m_Rebase;
uint32_t m_ComponentCount;
std::vector<Operand *> m_UseSites;
};
typedef std::multimap<uint32_t, ConstantArrayChunk> ChunkMap;
struct ConstantArrayInfo
{
ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {}
Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array
ChunkMap m_Chunks; // map of <starting offset, chunk info>, same start offset might have multiple entries for different access masks
};
class ShaderPhase
{
public:
ShaderPhase()
:
ePhase(MAIN_PHASE),
ui32InstanceCount(0),
postShaderCode(),
hasPostShaderCode(0),
earlyMain(),
ui32OrigTemps(0),
ui32TotalTemps(0),
psTempDeclaration(NULL),
pui32SplitInfo(),
peTempTypes(),
acInputNeedsRedirect(),
acOutputNeedsRedirect(),
acPatchConstantsNeedsRedirect(),
m_CFG(),
m_CFGInitialized(false),
m_NextFreeTempRegister(1),
m_NextTexCoordTemp(0)
{}
void ResolveUAVProperties();
void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller
void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first!
ConstantArrayInfo m_ConstantArrayInfo;
std::vector<Declaration> psDecl;
std::vector<Instruction> psInst;
SHADER_PHASE_TYPE ePhase;
uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1.
bstring postShaderCode;//End of main or before emit()
int hasPostShaderCode;
bstring earlyMain;//Code to be inserted at the start of phase
uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared
uint32_t ui32TotalTemps; // The number of temporaries this phase has now
Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
std::vector<uint32_t> pui32SplitInfo;
std::vector<SHADER_VARIABLE_TYPE> peTempTypes;
// These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together.
std::vector<unsigned char> acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared.
std::vector<unsigned char> acOutputNeedsRedirect; // Same for outputs
std::vector<unsigned char> acPatchConstantsNeedsRedirect; // Same for patch constants
// Get the Control Flow Graph for this phase, build it if necessary.
HLSLcc::ControlFlow::ControlFlowGraph &GetCFG();
uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
private:
bool m_CFGInitialized;
HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
};
class Shader
{
public:
Shader()
:
ui32MajorVersion(0),
ui32MinorVersion(0),
eShaderType(INVALID_SHADER),
eTargetLanguage(LANG_DEFAULT),
extensions(0),
fp64(0),
ui32ShaderLength(0),
aui32FuncTableToFuncPointer(),
aui32FuncBodyToFuncTable(),
funcTable(),
funcPointer(),
ui32NextClassFuncName(),
pui32FirstToken(NULL),
asPhases(),
sInfo(),
abScalarInput(),
abScalarOutput(),
aIndexedInput(),
aIndexedOutput(),
aIndexedInputParents(),
aeResourceDims(),
acInputDeclared(),
acOutputDeclared(),
aiOpcodeUsed(NUM_OPCODES, 0),
ui32CurrentVertexOutputStream(0),
textureSamplers(),
aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0),
ui32CurrentStructuredBufferIndex(),
m_CubemapArrayExtensionDeclared(false),
m_TextureBufferExtensionDeclared(false),
m_ClipDistanceExtensionDeclared(false)
{
}
// Retrieve the number of components the temp register has.
uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const;
//Hull shaders have multiple phases.
//Each phase has its own temps.
//Convert from per-phase temps to global temps.
void ConsolidateHullTempVars();
// Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list
void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase);
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
// In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero.
void PrepareStructuredBufferBindingSlots();
// Detect temp registers per data type that are actually used.
void PruneTempRegisters();
// Check if inputs and outputs are accessed across semantic boundaries
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
void AnalyzeIOOverlap();
// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
void ForcePositionToHighp();
void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used.
void ExpandSWAPCs();
uint32_t ui32MajorVersion;
uint32_t ui32MinorVersion;
SHADER_TYPE eShaderType;
GLLang eTargetLanguage;
const struct GlExtensions *extensions;
int fp64;
//DWORDs in program code, including version and length tokens.
uint32_t ui32ShaderLength;
//Instruction* functions;//non-main subroutines
HLSLcc::growing_vector<uint32_t> aui32FuncTableToFuncPointer; // dynamic alloc?
HLSLcc::growing_vector<uint32_t> aui32FuncBodyToFuncTable;
struct FuncTableEntry{
HLSLcc::growing_vector<uint32_t> aui32FuncBodies;
};
HLSLcc::growing_vector<FuncTableEntry> funcTable;
struct FuncPointerEntry {
HLSLcc::growing_vector<uint32_t> aui32FuncTables;
uint32_t ui32NumBodiesPerTable;
};
HLSLcc::growing_vector<FuncPointerEntry> funcPointer;
HLSLcc::growing_vector<uint32_t> ui32NextClassFuncName;
const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream.
std::vector<ShaderPhase> asPhases;
ShaderInfo sInfo;
// There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex.
// Which one is used depends on the context:
// per-vertex space is used in vertex/pixel/geom shaders always
// hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT)
// domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT
// Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch
// Note that these ints are component masks
HLSLcc::growing_vector<int> abScalarInput[2];
HLSLcc::growing_vector<int> abScalarOutput[2];
HLSLcc::growing_vector<int> aIndexedInput[2];
HLSLcc::growing_vector<bool> aIndexedOutput[2];
HLSLcc::growing_vector<int> aIndexedInputParents[2];
HLSLcc::growing_vector<RESOURCE_DIMENSION> aeResourceDims;
HLSLcc::growing_vector<char> acInputDeclared[2];
HLSLcc::growing_vector<char> acOutputDeclared[2];
std::vector<int> aiOpcodeUsed; // Initialized to NUM_OPCODES elements above.
uint32_t ui32CurrentVertexOutputStream;
TextureSamplerPairs textureSamplers;
std::vector<uint32_t> aui32StructuredBufferBindingPoints;
uint32_t ui32CurrentStructuredBufferIndex;
bool m_CubemapArrayExtensionDeclared;
bool m_TextureBufferExtensionDeclared;
bool m_ClipDistanceExtensionDeclared;
std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
std::vector<char> psInt16TempSizes; // min16ints
std::vector<char> psInt12TempSizes; // min12ints
std::vector<char> psUIntTempSizes; // Same for uints
std::vector<char> psUInt16TempSizes; // ... and for uint16's
std::vector<char> psFloatTempSizes; // ...and for floats
std::vector<char> psFloat16TempSizes; // ...and for min16floats
std::vector<char> psFloat10TempSizes; // ...and for min10floats
std::vector<char> psDoubleTempSizes; // ...and for doubles
std::vector<char> psBoolTempSizes; // ... and for bools
private:
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);
};

View File

@ -0,0 +1,35 @@
#pragma once
#include "HLSLCrossCompilerContext.h"
#include "Shader.h"
struct Declaration;
// Base class for translator backend implenentations.
class Translator
{
protected:
HLSLCrossCompilerContext *psContext;
public:
explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {}
virtual ~Translator() {}
virtual bool Translate() = 0;
virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
// Translate system value type to name, return true if succeeded and no further translation is necessary
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL) = 0;
// In GLSL, the input and output names cannot clash.
// Also, the output name of previous stage must match the input name of the next stage.
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
//
virtual void SetIOPrefixes() = 0;
void SetExtensions(const struct GlExtensions *ext)
{
psContext->psShader->extensions = ext;
}
};

View File

@ -0,0 +1,141 @@
#pragma once
#include <set>
#include <map>
#include <list>
#include <vector>
#include <algorithm>
#include <stdint.h>
#include <string.h>
struct DefineUseChainEntry;
struct UseDefineChainEntry;
typedef std::set<DefineUseChainEntry *> DefineSet;
typedef std::set<UseDefineChainEntry *> UsageSet;
struct Instruction;
class Operand;
class ShaderInfo;
namespace HLSLcc
{
namespace ControlFlow
{
class ControlFlowGraph;
};
};
// Def-Use chain per temp component
struct DefineUseChainEntry
{
DefineUseChainEntry()
: psInst(0)
, psOp(0)
, usages()
, writeMask(0)
, index(0)
, isStandalone(0)
{
memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *));
}
Instruction *psInst; // The declaration (write to this temp component)
Operand *psOp; // The operand within this instruction for the write target
UsageSet usages; // List of usages that are dependent on this write
uint32_t writeMask; // Access mask; which all components were written to in the same op
uint32_t index; // For which component was this definition created for?
uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings
struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components.
#if _DEBUG
bool operator==(const DefineUseChainEntry &a) const
{
if (psInst != a.psInst)
return false;
if (psOp != a.psOp)
return false;
if (writeMask != a.writeMask)
return false;
if (index != a.index)
return false;
if (isStandalone != a.isStandalone)
return false;
// Just check that each one has the same amount of usages
if (usages.size() != a.usages.size())
return false;
return true;
}
#endif
};
typedef std::list<DefineUseChainEntry> DefineUseChain;
struct UseDefineChainEntry
{
UseDefineChainEntry()
: psInst(0)
, psOp(0)
, defines()
, accessMask(0)
, index(0)
{
memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *));
}
Instruction *psInst; // The use (read from this temp component)
Operand *psOp; // The operand within this instruction for the read
DefineSet defines; // List of writes that are visible to this read
uint32_t accessMask; // Which all components were read together with this one
uint32_t index; // For which component was this usage created for?
struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components.
#if _DEBUG
bool operator==(const UseDefineChainEntry &a) const
{
if (psInst != a.psInst)
return false;
if (psOp != a.psOp)
return false;
if (accessMask != a.accessMask)
return false;
if (index != a.index)
return false;
// Just check that each one has the same amount of usages
if (defines.size() != a.defines.size())
return false;
return true;
}
#endif
};
typedef std::list<UseDefineChainEntry> UseDefineChain;
typedef std::map<uint32_t, UseDefineChain> UseDefineChains;
typedef std::map<uint32_t, DefineUseChain> DefineUseChains;
typedef std::vector<DefineUseChainEntry *> ActiveDefinitions;
// Do flow control analysis on the instructions and build the define-use and use-define chains
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg);
// Do temp splitting based on use-define chains
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable);
// Based on the sampler precisions, downgrade the definitions if possible.
void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps);
// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition.
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps);
// Write the uses and defines back to Instruction and Operand member lists.
void WriteBackUsesAndDefines(DefineUseChains &psDUChains);

View File

@ -0,0 +1,18 @@
#ifndef DEBUG_H_
#define DEBUG_H_
#ifdef _DEBUG
#include "assert.h"
#define ASSERT(expr) CustomAssert(expr)
static void CustomAssert(int expression)
{
if(!expression)
{
assert(0);
}
}
#else
#define ASSERT(expr)
#endif
#endif

View File

@ -0,0 +1,10 @@
#ifndef DECODE_H
#define DECODE_H
#include "internal_includes/Shader.h"
Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags);
void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst);
#endif

View File

@ -0,0 +1,249 @@
#ifndef LANGUAGES_H
#define LANGUAGES_H
#include "hlslcc.h"
static int InOutSupported(const GLLang eLang)
{
if(eLang == LANG_ES_100 || eLang == LANG_120)
{
return 0;
}
return 1;
}
static int WriteToFragData(const GLLang eLang)
{
if(eLang == LANG_ES_100 || eLang == LANG_120)
{
return 1;
}
return 0;
}
static int ShaderBitEncodingSupported(const GLLang eLang)
{
if( eLang != LANG_ES_300 &&
eLang != LANG_ES_310 &&
eLang < LANG_330)
{
return 0;
}
return 1;
}
static int HaveOverloadedTextureFuncs(const GLLang eLang)
{
if(eLang == LANG_ES_100 || eLang == LANG_120)
{
return 0;
}
return 1;
}
//Only enable for ES.
//Not present in 120, ignored in other desktop languages.
static int HavePrecisionQualifers(const GLLang eLang)
{
if(eLang >= LANG_ES_100 && eLang <= LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveCubemapArray(const GLLang eLang)
{
if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
return 1;
return 0;
}
static bool IsESLanguage(const GLLang eLang)
{
return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST);
}
static bool IsDesktopGLLanguage(const GLLang eLang)
{
return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST);
}
//Only on vertex inputs and pixel outputs.
static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions)
{
if(eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location))
{
return 1;
}
return 0;
}
static int HaveInOutLocationQualifier(const GLLang eLang)
{
if(eLang >= LANG_410 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
//layout(binding = X) uniform {uniformA; uniformB;}
//layout(location = X) uniform uniform_name;
static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExtensions *extensions, unsigned int flags)
{
if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)
return 0;
if (eLang >= LANG_430 || eLang == LANG_ES_310 ||
(extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack))
{
return 1;
}
return 0;
}
static int DualSourceBlendSupported(const GLLang eLang)
{
if(eLang >= LANG_330)
{
return 1;
}
return 0;
}
static int SubroutinesSupported(const GLLang eLang)
{
if(eLang >= LANG_400)
{
return 1;
}
return 0;
}
//Before 430, flat/smooth/centroid/noperspective must match
//between fragment and its previous stage.
//HLSL bytecode only tells us the interpolation in pixel shader.
static int PixelInterpDependency(const GLLang eLang)
{
if(eLang < LANG_430)
{
return 1;
}
return 0;
}
static int HaveUVec(const GLLang eLang)
{
switch(eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveGather(const GLLang eLang)
{
if(eLang >= LANG_400 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveGatherNonConstOffset(const GLLang eLang)
{
if(eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveQueryLod(const GLLang eLang)
{
if(eLang >= LANG_400)
{
return 1;
}
return 0;
}
static int HaveQueryLevels(const GLLang eLang)
{
if(eLang >= LANG_430)
{
return 1;
}
return 0;
}
static int HaveFragmentCoordConventions(const GLLang eLang)
{
if(eLang >= LANG_150)
{
return 1;
}
return 0;
}
static int HaveGeometryShaderARB(const GLLang eLang)
{
if(eLang >= LANG_150)
{
return 1;
}
return 0;
}
static int HaveAtomicCounter(const GLLang eLang)
{
if(eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveAtomicMem(const GLLang eLang)
{
if (eLang >= LANG_430 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveImageAtomics(const GLLang eLang)
{
if (eLang >= LANG_420)
{
return 1;
}
return 0;
}
static int HaveCompute(const GLLang eLang)
{
if(eLang >= LANG_430 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveImageLoadStore(const GLLang eLang)
{
if(eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
#endif

View File

@ -0,0 +1,27 @@
#ifndef REFLECT_H
#define REFLECT_H
#include "hlslcc.h"
struct ShaderPhase_TAG;
typedef struct
{
uint32_t* pui32Inputs;
uint32_t* pui32Outputs;
uint32_t* pui32Resources;
uint32_t* pui32Interfaces;
uint32_t* pui32Inputs11;
uint32_t* pui32Outputs11;
uint32_t* pui32OutputsWithStreams;
uint32_t* pui32PatchConstants;
uint32_t* pui32PatchConstants11;
} ReflectionChunks;
void LoadShaderInfo(const uint32_t ui32MajorVersion,
const uint32_t ui32MinorVersion,
const ReflectionChunks* psChunks,
ShaderInfo* psInfo, uint32_t decodeFlags);
#endif

View File

@ -0,0 +1,107 @@
#pragma once
#include "hlslcc.h"
#include "internal_includes/Translator.h"
class HLSLCrossCompilerContext;
class ToGLSL : public Translator
{
protected:
GLLang language;
public:
explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT) {}
// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
GLLang SetLanguage(GLLang suggestedLanguage);
virtual bool Translate();
virtual void TranslateDeclaration(const Declaration* psDecl);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
virtual void SetIOPrefixes();
private:
void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
void TranslateInstruction(Instruction* psInst, bool isEmbedded = false);
void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
void TranslateOperandIndex(const Operand* psOperand, int index);
void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add);
void AddOpAssignToDestWithMask(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
void AddAssignToDest(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName);
void HandleOutputRedirect(const Declaration *psDecl, const char *Precision);
void HandleInputRedirect(const Declaration *psDecl, const char *Precision);
void AddUserOutput(const Declaration* psDecl);
void DeclareStructConstants(const uint32_t ui32BindingPoint,
const ConstantBuffer* psCBuf, const Operand* psOperand,
bstring glsl);
typedef enum
{
CMP_EQ,
CMP_LT,
CMP_GE,
CMP_NE,
} ComparisonType;
void AddComparison(Instruction* psInst, ComparisonType eType,
uint32_t typeFlag);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void CallBinaryOp(const char* name, Instruction* psInst,
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false);
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
int dest, int src0, int src1, int src2, uint32_t dataType);
void CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
void CallHelper2(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper2Int(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper2UInt(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper1(const char* name, Instruction* psInst,
int dest, int src0, int paramsShouldFollowWriteMask);
void CallHelper1Int(
const char* name,
Instruction* psInst,
const int dest,
const int src0,
int paramsShouldFollowWriteMask);
void TranslateTexelFetch(
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexelFetchOffset(
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexCoord(
const RESOURCE_DIMENSION eResDim,
Operand* psTexCoordOperand);
void GetResInfoData(Instruction* psInst, int index, int destElem);
void TranslateTextureSample(Instruction* psInst,
uint32_t ui32Flags);
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
void TranslateShaderStorageStore(Instruction* psInst);
void TranslateShaderStorageLoad(Instruction* psInst);
void TranslateAtomicMemOp(Instruction* psInst);
void TranslateConditional(
Instruction* psInst,
bstring glsl);
};

View File

@ -0,0 +1,23 @@
#ifndef TO_GLSL_OPERAND_H
#define TO_GLSL_OPERAND_H
#include <stdint.h>
#include "bstrlib.h"
#include "ShaderInfo.h"
class HLSLCrossCompilerContext;
//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag);
// Translate operand but add additional component mask
//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask);
void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase);
void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase);
void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
#endif

View File

@ -0,0 +1,193 @@
#pragma once
#include "internal_includes/Translator.h"
#include <map>
#include <vector>
// We store struct definition contents inside a vector of strings
struct StructDefinition
{
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
std::vector<std::string> m_Members; // A vector of strings with the struct members
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
bool m_IsPrinted; // Has this struct been printed out yet?
};
typedef std::map<std::string, StructDefinition> StructDefinitions;
// Map of extra function definitions we need to add before the shader body but after the declarations.
typedef std::map<std::string, std::string> FunctionDefinitions;
// A helper class for allocating binding slots
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
class BindingSlotAllocator
{
typedef std::map<uint32_t, uint32_t> SlotMap;
SlotMap m_Allocations;
public:
BindingSlotAllocator() : m_Allocations(), m_NextFreeSlot(0) {}
enum BindType
{
ConstantBuffer = 0,
RWBuffer,
Texture,
UAV
};
// isUAV is only meaningful for texture slots
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
{
// The key is regNumber with the bindtype stored to highest 16 bits
uint32_t key = regNo | (uint32_t(type) << 16);
SlotMap::iterator itr = m_Allocations.find(key);
if (itr == m_Allocations.end())
{
m_Allocations.insert(std::make_pair(key, m_NextFreeSlot));
return m_NextFreeSlot++;
}
return itr->second;
}
private:
uint32_t m_NextFreeSlot;
};
class ToMetal : public Translator
{
protected:
GLLang language;
public:
explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx), m_ShadowSamplerDeclared(false) {}
virtual bool Translate();
virtual void TranslateDeclaration(const Declaration *psDecl);
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL);
std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
virtual void SetIOPrefixes();
private:
void TranslateInstruction(Instruction* psInst);
void DeclareBuiltinInput(const Declaration *psDecl);
void DeclareBuiltinOutput(const Declaration *psDecl);
// Retrieve the name of the output struct for this shader
std::string GetOutputStructName() const;
std::string GetInputStructName() const;
void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName);
void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName);
void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint);
void DeclareStructType(const std::string &name, const std::vector<ShaderVar> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false);
void DeclareStructType(const std::string &name, const std::vector<ShaderVarType> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0);
void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0);
void DeclareBufferVariable(const Declaration *psDecl, const bool isRaw, const bool isUAV);
void DeclareResource(const Declaration *psDecl);
void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim);
void DeclareOutput(const Declaration *decl);
void PrintStructDeclarations(StructDefinitions &defs);
std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber);
// ToMetalOperand.cpp
std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true);
std::string TranslateOperandIndex(const Operand* psOperand, int index);
std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
// ToMetalInstruction.cpp
void AddOpAssignToDestWithMask(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask);
void AddAssignToDest(const Operand* psDest,
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis);
void AddAssignPrologue(int numParenthesis);
typedef enum
{
CMP_EQ,
CMP_LT,
CMP_GE,
CMP_NE,
} ComparisonType;
void AddComparison(Instruction* psInst, ComparisonType eType,
uint32_t typeFlag);
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc);
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2);
void CallBinaryOp(const char* name, Instruction* psInst,
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
int dest, int src0, int src1, int src2, uint32_t dataType);
void CallHelper3(const char* name, Instruction* psInst,
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
void CallHelper2(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper2Int(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper2UInt(const char* name, Instruction* psInst,
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
void CallHelper1(const char* name, Instruction* psInst,
int dest, int src0, int paramsShouldFollowWriteMask);
void CallHelper1Int(
const char* name,
Instruction* psInst,
const int dest,
const int src0,
int paramsShouldFollowWriteMask);
void TranslateTexelFetch(
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexelFetchOffset(
Instruction* psInst,
const ResourceBinding* psBinding,
bstring glsl);
void TranslateTexCoord(
const RESOURCE_DIMENSION eResDim,
Operand* psTexCoordOperand);
void GetResInfoData(Instruction* psInst, int index, int destElem);
void TranslateTextureSample(Instruction* psInst,
uint32_t ui32Flags);
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
void TranslateShaderStorageStore(Instruction* psInst);
void TranslateShaderStorageLoad(Instruction* psInst);
void TranslateAtomicMemOp(Instruction* psInst);
void TranslateConditional(
Instruction* psInst,
bstring glsl);
// The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters
StructDefinitions m_StructDefinitions;
// A <function name, body text> map of extra helper functions we'll need.
FunctionDefinitions m_FunctionDefinitions;
BindingSlotAllocator m_TextureSlots;
BindingSlotAllocator m_BufferSlots;
std::string m_ExtraGlobalDefinitions;
bool m_ShadowSamplerDeclared;
void EnsureShadowSamplerDeclared();
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
void DeclareExtraFunction(const std::string &name, const std::string &body);
// Move all lowp -> mediump
void ClampPartialPrecisions();
};

View File

@ -0,0 +1,3 @@
#pragma once
#include "internal_includes/Declaration.h"

View File

@ -0,0 +1,783 @@
#ifndef TOKENS_H
#define TOKENS_H
#include "hlslcc.h"
enum SHADER_PHASE_TYPE
{
SHADER_PHASE_INVALID = -1,
MAIN_PHASE = 0,
HS_GLOBAL_DECL_PHASE = 1,
HS_CTRL_POINT_PHASE = 2,
HS_FORK_PHASE = 3,
HS_JOIN_PHASE = 4
};
static SHADER_TYPE DecodeShaderType(uint32_t ui32Token)
{
return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16);
}
static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token)
{
return (ui32Token & 0x000000f0) >> 4;
}
static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token)
{
return (ui32Token & 0x0000000f);
}
static uint32_t DecodeInstructionLength(uint32_t ui32Token)
{
return (ui32Token & 0x7f000000) >> 24;
}
static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token)
{
return (ui32Token & 0x80000000) >> 31;
}
typedef enum EXTENDED_OPCODE_TYPE
{
EXTENDED_OPCODE_EMPTY = 0,
EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
EXTENDED_OPCODE_RESOURCE_DIM = 2,
EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
} EXTENDED_OPCODE_TYPE;
static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token)
{
return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f);
}
static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
{
return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4))&0xF);
}
static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
{
return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4 + 6))&0xF);
}
enum OPCODE_TYPE
{
//For DX9
OPCODE_POW = -6,
OPCODE_DP2ADD = -5,
OPCODE_LRP = -4,
OPCODE_ENDREP = -3,
OPCODE_REP = -2,
OPCODE_SPECIAL_DCL_IMMCONST = -1,
OPCODE_ADD,
OPCODE_AND,
OPCODE_BREAK,
OPCODE_BREAKC,
OPCODE_CALL,
OPCODE_CALLC,
OPCODE_CASE,
OPCODE_CONTINUE,
OPCODE_CONTINUEC,
OPCODE_CUT,
OPCODE_DEFAULT,
OPCODE_DERIV_RTX,
OPCODE_DERIV_RTY,
OPCODE_DISCARD,
OPCODE_DIV,
OPCODE_DP2,
OPCODE_DP3,
OPCODE_DP4,
OPCODE_ELSE,
OPCODE_EMIT,
OPCODE_EMITTHENCUT,
OPCODE_ENDIF,
OPCODE_ENDLOOP,
OPCODE_ENDSWITCH,
OPCODE_EQ,
OPCODE_EXP,
OPCODE_FRC,
OPCODE_FTOI,
OPCODE_FTOU,
OPCODE_GE,
OPCODE_IADD,
OPCODE_IF,
OPCODE_IEQ,
OPCODE_IGE,
OPCODE_ILT,
OPCODE_IMAD,
OPCODE_IMAX,
OPCODE_IMIN,
OPCODE_IMUL,
OPCODE_INE,
OPCODE_INEG,
OPCODE_ISHL,
OPCODE_ISHR,
OPCODE_ITOF,
OPCODE_LABEL,
OPCODE_LD,
OPCODE_LD_MS,
OPCODE_LOG,
OPCODE_LOOP,
OPCODE_LT,
OPCODE_MAD,
OPCODE_MIN,
OPCODE_MAX,
OPCODE_CUSTOMDATA,
OPCODE_MOV,
OPCODE_MOVC,
OPCODE_MUL,
OPCODE_NE,
OPCODE_NOP,
OPCODE_NOT,
OPCODE_OR,
OPCODE_RESINFO,
OPCODE_RET,
OPCODE_RETC,
OPCODE_ROUND_NE,
OPCODE_ROUND_NI,
OPCODE_ROUND_PI,
OPCODE_ROUND_Z,
OPCODE_RSQ,
OPCODE_SAMPLE,
OPCODE_SAMPLE_C,
OPCODE_SAMPLE_C_LZ,
OPCODE_SAMPLE_L,
OPCODE_SAMPLE_D,
OPCODE_SAMPLE_B,
OPCODE_SQRT,
OPCODE_SWITCH,
OPCODE_SINCOS,
OPCODE_UDIV,
OPCODE_ULT,
OPCODE_UGE,
OPCODE_UMUL,
OPCODE_UMAD,
OPCODE_UMAX,
OPCODE_UMIN,
OPCODE_USHR,
OPCODE_UTOF,
OPCODE_XOR,
OPCODE_DCL_RESOURCE, // DCL* opcodes have
OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats.
OPCODE_DCL_SAMPLER,
OPCODE_DCL_INDEX_RANGE,
OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY,
OPCODE_DCL_GS_INPUT_PRIMITIVE,
OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,
OPCODE_DCL_INPUT,
OPCODE_DCL_INPUT_SGV,
OPCODE_DCL_INPUT_SIV,
OPCODE_DCL_INPUT_PS,
OPCODE_DCL_INPUT_PS_SGV,
OPCODE_DCL_INPUT_PS_SIV,
OPCODE_DCL_OUTPUT,
OPCODE_DCL_OUTPUT_SGV,
OPCODE_DCL_OUTPUT_SIV,
OPCODE_DCL_TEMPS,
OPCODE_DCL_INDEXABLE_TEMP,
OPCODE_DCL_GLOBAL_FLAGS,
// -----------------------------------------------
OPCODE_RESERVED_10,
// ---------- DX 10.1 op codes---------------------
OPCODE_LOD,
OPCODE_GATHER4,
OPCODE_SAMPLE_POS,
OPCODE_SAMPLE_INFO,
// -----------------------------------------------
// This should be 10.1's version of NUM_OPCODES
OPCODE_RESERVED_10_1,
// ---------- DX 11 op codes---------------------
OPCODE_HS_DECLS, // token marks beginning of HS sub-shader
OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader
OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader
OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader
OPCODE_EMIT_STREAM,
OPCODE_CUT_STREAM,
OPCODE_EMITTHENCUT_STREAM,
OPCODE_INTERFACE_CALL,
OPCODE_BUFINFO,
OPCODE_DERIV_RTX_COARSE,
OPCODE_DERIV_RTX_FINE,
OPCODE_DERIV_RTY_COARSE,
OPCODE_DERIV_RTY_FINE,
OPCODE_GATHER4_C,
OPCODE_GATHER4_PO,
OPCODE_GATHER4_PO_C,
OPCODE_RCP,
OPCODE_F32TOF16,
OPCODE_F16TOF32,
OPCODE_UADDC,
OPCODE_USUBB,
OPCODE_COUNTBITS,
OPCODE_FIRSTBIT_HI,
OPCODE_FIRSTBIT_LO,
OPCODE_FIRSTBIT_SHI,
OPCODE_UBFE,
OPCODE_IBFE,
OPCODE_BFI,
OPCODE_BFREV,
OPCODE_SWAPC,
OPCODE_DCL_STREAM,
OPCODE_DCL_FUNCTION_BODY,
OPCODE_DCL_FUNCTION_TABLE,
OPCODE_DCL_INTERFACE,
OPCODE_DCL_INPUT_CONTROL_POINT_COUNT,
OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT,
OPCODE_DCL_TESS_DOMAIN,
OPCODE_DCL_TESS_PARTITIONING,
OPCODE_DCL_TESS_OUTPUT_PRIMITIVE,
OPCODE_DCL_HS_MAX_TESSFACTOR,
OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT,
OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT,
OPCODE_DCL_THREAD_GROUP,
OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED,
OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW,
OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
OPCODE_DCL_RESOURCE_RAW,
OPCODE_DCL_RESOURCE_STRUCTURED,
OPCODE_LD_UAV_TYPED,
OPCODE_STORE_UAV_TYPED,
OPCODE_LD_RAW,
OPCODE_STORE_RAW,
OPCODE_LD_STRUCTURED,
OPCODE_STORE_STRUCTURED,
OPCODE_ATOMIC_AND,
OPCODE_ATOMIC_OR,
OPCODE_ATOMIC_XOR,
OPCODE_ATOMIC_CMP_STORE,
OPCODE_ATOMIC_IADD,
OPCODE_ATOMIC_IMAX,
OPCODE_ATOMIC_IMIN,
OPCODE_ATOMIC_UMAX,
OPCODE_ATOMIC_UMIN,
OPCODE_IMM_ATOMIC_ALLOC,
OPCODE_IMM_ATOMIC_CONSUME,
OPCODE_IMM_ATOMIC_IADD,
OPCODE_IMM_ATOMIC_AND,
OPCODE_IMM_ATOMIC_OR,
OPCODE_IMM_ATOMIC_XOR,
OPCODE_IMM_ATOMIC_EXCH,
OPCODE_IMM_ATOMIC_CMP_EXCH,
OPCODE_IMM_ATOMIC_IMAX,
OPCODE_IMM_ATOMIC_IMIN,
OPCODE_IMM_ATOMIC_UMAX,
OPCODE_IMM_ATOMIC_UMIN,
OPCODE_SYNC,
OPCODE_DADD,
OPCODE_DMAX,
OPCODE_DMIN,
OPCODE_DMUL,
OPCODE_DEQ,
OPCODE_DGE,
OPCODE_DLT,
OPCODE_DNE,
OPCODE_DMOV,
OPCODE_DMOVC,
OPCODE_DTOF,
OPCODE_FTOD,
OPCODE_EVAL_SNAPPED,
OPCODE_EVAL_SAMPLE_INDEX,
OPCODE_EVAL_CENTROID,
OPCODE_DCL_GS_INSTANCE_COUNT,
OPCODE_ABORT,
OPCODE_DEBUG_BREAK,
// -----------------------------------------------
// This marks the end of D3D11.0 opcodes
OPCODE_RESERVED_11,
OPCODE_DDIV,
OPCODE_DFMA,
OPCODE_DRCP,
OPCODE_MSAD,
OPCODE_DTOI,
OPCODE_DTOU,
OPCODE_ITOD,
OPCODE_UTOD,
// -----------------------------------------------
// This marks the end of D3D11.1 opcodes
OPCODE_RESERVED_11_1,
NUM_OPCODES,
OPCODE_INVALID = NUM_OPCODES,
};
static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token)
{
return (OPCODE_TYPE)(ui32Token & 0x00007ff);
}
typedef enum
{
INDEX_0D,
INDEX_1D,
INDEX_2D,
INDEX_3D,
} OPERAND_INDEX_DIMENSION;
static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token)
{
return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20);
}
typedef enum OPERAND_TYPE
{
OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10,
OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9,
OPERAND_TYPE_SPECIAL_TEXCOORD = -8,
OPERAND_TYPE_SPECIAL_POSITION = -7,
OPERAND_TYPE_SPECIAL_FOG = -6,
OPERAND_TYPE_SPECIAL_POINTSIZE = -5,
OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4,
OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3,
OPERAND_TYPE_SPECIAL_ADDRESS = -2,
OPERAND_TYPE_SPECIAL_IMMCONST = -1,
OPERAND_TYPE_TEMP = 0, // Temporary Register File
OPERAND_TYPE_INPUT = 1, // General Input Register File
OPERAND_TYPE_OUTPUT = 2, // General Output Register File
OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable)
OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s)
// If for example, operand token bits
// [01:00]==OPERAND_4_COMPONENT,
// this means that the operand type:
// OPERAND_TYPE_IMMEDIATE32
// results in 4 additional 32bit
// DWORDS present for the operand.
OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO
OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state
OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture)
OPERAND_TYPE_CONSTANT_BUFFER= 8, // Reference to constant buffer
OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9, // Reference to immediate constant buffer
OPERAND_TYPE_LABEL = 10, // Label
OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth
OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations
// Below Are operands new in DX 10.1
OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
// Below Are operands new in DX 11
OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource
OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition
OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
OPERAND_TYPE_INTERFACE = 19, // Reference to an interface
OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer
OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth
OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
} OPERAND_TYPE;
static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token)
{
return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12);
}
static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token)
{
return (SPECIAL_NAME)(ui32Token & 0x0000ffff);
}
typedef enum OPERAND_INDEX_REPRESENTATION
{
OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD
OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs
// (HI32:LO32)
OPERAND_INDEX_RELATIVE = 2, // Extra operand
OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
// extra operand
OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
// (HI32:LO32) followed
// by extra operand
} OPERAND_INDEX_REPRESENTATION;
static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token)
{
return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3<<(22+3*((ui32Dimension)&3)))) >> (22+3*((ui32Dimension)&3)));
}
typedef enum OPERAND_NUM_COMPONENTS
{
OPERAND_0_COMPONENT = 0,
OPERAND_1_COMPONENT = 1,
OPERAND_4_COMPONENT = 2,
OPERAND_N_COMPONENT = 3 // unused for now
} OPERAND_NUM_COMPONENTS;
static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token)
{
return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003);
}
typedef enum OPERAND_4_COMPONENT_SELECTION_MODE
{
OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components
OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components
OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
} OPERAND_4_COMPONENT_SELECTION_MODE;
static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token)
{
return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2);
}
#define OPERAND_4_COMPONENT_MASK_X 0x00000001
#define OPERAND_4_COMPONENT_MASK_Y 0x00000002
#define OPERAND_4_COMPONENT_MASK_Z 0x00000004
#define OPERAND_4_COMPONENT_MASK_W 0x00000008
#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X
#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y
#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z
#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W
#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f
static uint32_t DecodeOperand4CompMask(uint32_t ui32Token)
{
return (uint32_t)((ui32Token & 0x000000f0) >> 4);
}
static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token)
{
return (uint32_t)((ui32Token & 0x00000ff0) >> 4);
}
static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token)
{
return (uint32_t)((ui32Token & 0x00000030) >> 4);
}
#define OPERAND_4_COMPONENT_X 0
#define OPERAND_4_COMPONENT_Y 1
#define OPERAND_4_COMPONENT_Z 2
#define OPERAND_4_COMPONENT_W 3
static const uint32_t NO_SWIZZLE = (( (OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y<<2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6))/*<<4*/);
static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6)));
static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6)));
static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6)));
static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6)));
static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp)
{
return (uint32_t)(((ui32Token)>>(4+2*((comp)&3)))&3);
}
typedef enum RESOURCE_DIMENSION
{
RESOURCE_DIMENSION_UNKNOWN = 0,
RESOURCE_DIMENSION_BUFFER = 1,
RESOURCE_DIMENSION_TEXTURE1D = 2,
RESOURCE_DIMENSION_TEXTURE2D = 3,
RESOURCE_DIMENSION_TEXTURE2DMS = 4,
RESOURCE_DIMENSION_TEXTURE3D = 5,
RESOURCE_DIMENSION_TEXTURECUBE = 6,
RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
RESOURCE_DIMENSION_RAW_BUFFER = 11,
RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
} RESOURCE_DIMENSION;
static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token)
{
return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11);
}
static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token)
{
return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6);
}
typedef enum INSTRUCTION_TEST_BOOLEAN
{
INSTRUCTION_TEST_ZERO = 0,
INSTRUCTION_TEST_NONZERO = 1
} INSTRUCTION_TEST_BOOLEAN;
static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token)
{
return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18);
}
static uint32_t DecodeIsOperandExtended(uint32_t ui32Token)
{
return (ui32Token & 0x80000000) >> 31;
}
typedef enum EXTENDED_OPERAND_TYPE
{
EXTENDED_OPERAND_EMPTY = 0,
EXTENDED_OPERAND_MODIFIER = 1,
} EXTENDED_OPERAND_TYPE;
static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token)
{
return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f);
}
typedef enum OPERAND_MODIFIER
{
OPERAND_MODIFIER_NONE = 0,
OPERAND_MODIFIER_NEG = 1,
OPERAND_MODIFIER_ABS = 2,
OPERAND_MODIFIER_ABSNEG = 3,
} OPERAND_MODIFIER;
static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token)
{
return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6);
}
static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1<<11);
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1<<12);
static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1<<13);
static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1<<14);
static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1<<15);
static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1<<16);
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1<<17);
static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1<<18);
static uint32_t DecodeGlobalFlags(uint32_t ui32Token)
{
return (uint32_t)(ui32Token & 0x00fff800);
}
static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token)
{
return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11);
}
typedef enum PRIMITIVE_TOPOLOGY
{
PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
PRIMITIVE_TOPOLOGY_POINTLIST = 1,
PRIMITIVE_TOPOLOGY_LINELIST = 2,
PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
// 6 is reserved for legacy triangle fans
// Adjacency values should be equal to (0x8 & non-adjacency):
PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
} PRIMITIVE_TOPOLOGY;
static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token)
{
return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11);
}
typedef enum PRIMITIVE
{
PRIMITIVE_UNDEFINED = 0,
PRIMITIVE_POINT = 1,
PRIMITIVE_LINE = 2,
PRIMITIVE_TRIANGLE = 3,
// Adjacency values should be equal to (0x4 & non-adjacency):
PRIMITIVE_LINE_ADJ = 6,
PRIMITIVE_TRIANGLE_ADJ = 7,
PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
} PRIMITIVE;
static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token)
{
return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11);
}
static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token)
{
return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11);
}
typedef enum TESSELLATOR_DOMAIN
{
TESSELLATOR_DOMAIN_UNDEFINED = 0,
TESSELLATOR_DOMAIN_ISOLINE = 1,
TESSELLATOR_DOMAIN_TRI = 2,
TESSELLATOR_DOMAIN_QUAD = 3
} TESSELLATOR_DOMAIN;
static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token)
{
return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11);
}
static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token)
{
return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11);
}
static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800;
static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000;
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000;
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000;
static uint32_t DecodeSyncFlags(uint32_t ui32Token)
{
return ui32Token & 0x00007800;
}
// The number of types that implement this interface
static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token)
{
return (uint32_t)((ui32Token & 0x0000ffff) >> 0);
}
// The number of interfaces that are defined in this array.
static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token)
{
return (uint32_t)((ui32Token & 0xffff0000) >> 16);
}
typedef enum CUSTOMDATA_CLASS
{
CUSTOMDATA_COMMENT = 0,
CUSTOMDATA_DEBUGINFO,
CUSTOMDATA_OPAQUE,
CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
CUSTOMDATA_SHADER_MESSAGE,
} CUSTOMDATA_CLASS;
static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token)
{
return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11);
}
static uint32_t DecodeInstructionSaturate(uint32_t ui32Token)
{
return (ui32Token & 0x00002000) ? 1 : 0;
}
typedef enum OPERAND_MIN_PRECISION
{
OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision
// for the shader model
OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float
OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float
OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer
OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer
} OPERAND_MIN_PRECISION;
static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token)
{
return (ui32Token & 0x0001C000) >> 14;
}
static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token)
{
return ((ui32Token & 0x0001f800) >> 11);
}
typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD
{
IMMEDIATE_ADDRESS_OFFSET_U = 0,
IMMEDIATE_ADDRESS_OFFSET_V = 1,
IMMEDIATE_ADDRESS_OFFSET_W = 2,
} IMMEDIATE_ADDRESS_OFFSET_COORD;
#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3))
#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
static uint32_t DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_COORD eCoord, uint32_t ui32Token)
{
return ((((ui32Token)&IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord))>>(IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord))));
}
// UAV access scope flags
static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000;
static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token)
{
return ui32Token & 0x00010000;
}
typedef enum RESINFO_RETURN_TYPE
{
RESINFO_INSTRUCTION_RETURN_FLOAT = 0,
RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1,
RESINFO_INSTRUCTION_RETURN_UINT = 2
} RESINFO_RETURN_TYPE;
static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token)
{
return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11);
}
#endif

600
src/reflect.cpp Normal file
View File

@ -0,0 +1,600 @@
#include "internal_includes/reflect.h"
#include "internal_includes/debug.h"
#include "internal_includes/decode.h"
#include "bstrlib.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
static void FormatVariableName(std::string & Name)
{
/* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx
The uniform function parameters appear in the
constant table prepended with a dollar sign ($),
unlike the global variables. The dollar sign is
required to avoid name collisions between local
uniform inputs and global variables of the same name.*/
/* Leave $ThisPointer, $Element and $Globals as-is.
Otherwise remove $ character ($ is not a valid character for GLSL variable names). */
if(Name[0] == '$')
{
if(strcmp(Name.c_str(), "$Element") !=0 &&
strcmp(Name.c_str(), "$Globals") != 0 &&
strcmp(Name.c_str(), "$ThisPointer") != 0)
{
Name[0] = '_';
}
}
}
static std::string ReadStringFromTokenStream(const uint32_t* tokens)
{
char* charTokens = (char*) tokens;
return std::string(charTokens);
}
static int MaskToRebaseOffset(const uint32_t mask)
{
int res = 0;
uint32_t m = mask;
while ((m & 1) == 0)
{
res++;
m = m >> 1;
}
return res;
}
static void ReadInputSignatures(const uint32_t* pui32Tokens,
ShaderInfo* psShaderInfo,
const int extended)
{
uint32_t i;
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
const uint32_t ui32ElementCount = *pui32Tokens++;
/* const uint32_t ui32Key = * */ pui32Tokens++;
psShaderInfo->psInputSignatures.clear();
psShaderInfo->psInputSignatures.resize(ui32ElementCount);
for(i=0; i<ui32ElementCount; ++i)
{
uint32_t ui32ComponentMasks;
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i];
uint32_t ui32SemanticNameOffset;
psCurrentSignature->ui32Stream = 0;
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
if(extended)
psCurrentSignature->ui32Stream = *pui32Tokens++;
ui32SemanticNameOffset = *pui32Tokens++;
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
psCurrentSignature->eSystemValueType = (SPECIAL_NAME) *pui32Tokens++;
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
psCurrentSignature->ui32Register = *pui32Tokens++;
ui32ComponentMasks = *pui32Tokens++;
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
//Shows which components are read
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
if(extended)
psCurrentSignature->eMinPrec = (MIN_PRECISION) *pui32Tokens++;
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken+ui32SemanticNameOffset));
}
}
static void ReadOutputSignatures(const uint32_t* pui32Tokens,
ShaderInfo* psShaderInfo,
const int minPrec,
const int streams)
{
uint32_t i;
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
const uint32_t ui32ElementCount = *pui32Tokens++;
/*const uint32_t ui32Key = * */ pui32Tokens++;
psShaderInfo->psOutputSignatures.clear();
psShaderInfo->psOutputSignatures.resize(ui32ElementCount);
for(i=0; i<ui32ElementCount; ++i)
{
uint32_t ui32ComponentMasks;
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i];
uint32_t ui32SemanticNameOffset;
psCurrentSignature->ui32Stream = 0;
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
if(streams)
psCurrentSignature->ui32Stream = *pui32Tokens++;
ui32SemanticNameOffset = *pui32Tokens++;
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
psCurrentSignature->ui32Register = *pui32Tokens++;
// Massage some special inputs/outputs to match the types of GLSL counterparts
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
{
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
}
ui32ComponentMasks = *pui32Tokens++;
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
//Shows which components are NEVER written.
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
if(minPrec)
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
}
}
static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens,
ShaderInfo* psShaderInfo,
const int minPrec,
const int streams)
{
uint32_t i;
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
const uint32_t ui32ElementCount = *pui32Tokens++;
/*const uint32_t ui32Key = * */ pui32Tokens++;
psShaderInfo->psPatchConstantSignatures.clear();
psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount);
for(i=0; i<ui32ElementCount; ++i)
{
uint32_t ui32ComponentMasks;
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i];
uint32_t ui32SemanticNameOffset;
psCurrentSignature->ui32Stream = 0;
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
if(streams)
psCurrentSignature->ui32Stream = *pui32Tokens++;
ui32SemanticNameOffset = *pui32Tokens++;
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++;
psCurrentSignature->ui32Register = *pui32Tokens++;
// Massage some special inputs/outputs to match the types of GLSL counterparts
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
{
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
}
ui32ComponentMasks = *pui32Tokens++;
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
//Shows which components are NEVER written.
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
if(minPrec)
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
}
}
static const uint32_t* ReadResourceBinding(const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags)
{
uint32_t ui32NameOffset = *pui32Tokens++;
psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken+ui32NameOffset));
FormatVariableName(psBinding->name);
psBinding->eType = (ResourceType)*pui32Tokens++;
psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++;
psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++;
psBinding->ui32NumSamples = *pui32Tokens++;
psBinding->ui32BindPoint = *pui32Tokens++;
psBinding->ui32BindCount = *pui32Tokens++;
psBinding->ui32Flags = *pui32Tokens++;
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN;
if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME)
{
if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6)
{
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP;
psBinding->name.resize(psBinding->name.length() - 6);
}
else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8)
{
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP;
psBinding->name.resize(psBinding->name.length() - 8);
}
else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5)
{
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP;
psBinding->name.resize(psBinding->name.length() - 5);
}
}
return pui32Tokens;
}
//Read D3D11_SHADER_TYPE_DESC
static void ReadShaderVariableType(const uint32_t ui32MajorVersion,
const uint32_t* pui32FirstConstBufToken,
const uint32_t* pui32tokens, ShaderVarType* varType)
{
const uint16_t* pui16Tokens = (const uint16_t*) pui32tokens;
uint16_t ui32MemberCount;
uint32_t ui32MemberOffset;
const uint32_t* pui32MemberTokens;
uint32_t i;
varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0];
varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1];
varType->Rows = pui16Tokens[2];
varType->Columns = pui16Tokens[3];
varType->Elements = pui16Tokens[4];
varType->MemberCount = ui32MemberCount = pui16Tokens[5];
varType->Members.clear();
if(varType->ParentCount)
{
// Add empty brackets for array parents. Indices are filled in later in the printing codes.
if (varType->Parent->Elements > 1)
varType->fullName = varType->Parent->fullName + "[]." + varType->name;
else
varType->fullName = varType->Parent->fullName + "." + varType->name;
}
if(ui32MemberCount)
{
varType->Members.resize(ui32MemberCount);
ui32MemberOffset = pui32tokens[3];
pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberOffset);
for(i=0; i< ui32MemberCount; ++i)
{
uint32_t ui32NameOffset = *pui32MemberTokens++;
uint32_t ui32MemberTypeOffset = *pui32MemberTokens++;
varType->Members[i].Parent = varType;
varType->Members[i].ParentCount = varType->ParentCount + 1;
varType->Members[i].Offset = *pui32MemberTokens++;
varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken,
(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberTypeOffset), &varType->Members[i]);
}
}
}
static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo,
const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer)
{
uint32_t i;
uint32_t ui32NameOffset = *pui32Tokens++;
uint32_t ui32VarCount = *pui32Tokens++;
uint32_t ui32VarOffset = *pui32Tokens++;
const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32VarOffset);
psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
FormatVariableName(psBuffer->name);
psBuffer->asVars.clear();
psBuffer->asVars.resize(ui32VarCount);
for(i=0; i<ui32VarCount; ++i)
{
//D3D11_SHADER_VARIABLE_DESC
ShaderVar * const psVar = &psBuffer->asVars[i];
uint32_t ui32Flags;
uint32_t ui32TypeOffset;
uint32_t ui32DefaultValueOffset;
ui32NameOffset = *pui32VarToken++;
psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
FormatVariableName(psVar->name);
psVar->ui32StartOffset = *pui32VarToken++;
psVar->ui32Size = *pui32VarToken++;
ui32Flags = *pui32VarToken++;
ui32TypeOffset = *pui32VarToken++;
psVar->sType.name = psVar->name;
psVar->sType.fullName = psVar->name;
psVar->sType.Parent = 0;
psVar->sType.ParentCount = 0;
psVar->sType.Offset = 0;
psVar->sType.m_IsUsed = false;
ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken,
(const uint32_t*)((const char*)pui32FirstConstBufToken+ui32TypeOffset), &psVar->sType);
ui32DefaultValueOffset = *pui32VarToken++;
if (psShaderInfo->ui32MajorVersion >= 5)
{
/*uint32_t StartTexture = * */pui32VarToken++;
/*uint32_t TextureSize = * */pui32VarToken++;
/*uint32_t StartSampler = * */pui32VarToken++;
/*uint32_t SamplerSize = * */pui32VarToken++;
}
psVar->haveDefaultValue = 0;
if(ui32DefaultValueOffset)
{
uint32_t i = 0;
const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4;
const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32DefaultValueOffset);
//Always a sequence of 4-bytes at the moment.
//bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes.
ASSERT(psVar->ui32Size%4 == 0);
psVar->haveDefaultValue = 1;
psVar->pui32DefaultValues.clear();
psVar->pui32DefaultValues.resize(psVar->ui32Size / 4);
for(i=0; i<ui32NumDefaultValues;++i)
{
psVar->pui32DefaultValues[i] = pui32DefaultValToken[i];
}
}
}
{
uint32_t ui32Flags;
uint32_t ui32BufferType;
psBuffer->ui32TotalSizeInBytes = *pui32Tokens++;
ui32Flags = *pui32Tokens++;
ui32BufferType = *pui32Tokens++;
}
return pui32Tokens;
}
static void ReadResources(const uint32_t* pui32Tokens,//in
ShaderInfo* psShaderInfo, //out
uint32_t decodeFlags)
{
ResourceBinding* psResBindings;
ConstantBuffer* psConstantBuffers;
const uint32_t* pui32ConstantBuffers;
const uint32_t* pui32ResourceBindings;
const uint32_t* pui32FirstToken = pui32Tokens;
uint32_t i;
const uint32_t ui32NumConstantBuffers = *pui32Tokens++;
const uint32_t ui32ConstantBufferOffset = *pui32Tokens++;
uint32_t ui32NumResourceBindings = *pui32Tokens++;
uint32_t ui32ResourceBindingOffset = *pui32Tokens++;
/*uint32_t ui32ShaderModel = * */ pui32Tokens++;
/*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx
//Resources
pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset);
psShaderInfo->psResourceBindings.clear();
psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings);
psResBindings = &psShaderInfo->psResourceBindings[0];
for(i=0; i < ui32NumResourceBindings; ++i)
{
pui32ResourceBindings = ReadResourceBinding(pui32FirstToken, pui32ResourceBindings, psResBindings+i, decodeFlags);
ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS);
}
//Constant buffers
pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset);
psShaderInfo->psConstantBuffers.clear();
psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers);
psConstantBuffers = &psShaderInfo->psConstantBuffers[0];
for(i=0; i < ui32NumConstantBuffers; ++i)
{
pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers+i);
}
//Map resource bindings to constant buffers
if(psShaderInfo->psConstantBuffers.size())
{
for(i=0; i < ui32NumResourceBindings; ++i)
{
ResourceGroup eRGroup;
uint32_t cbufIndex = 0;
eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType);
//Find the constant buffer whose name matches the resource at the given resource binding point
for(cbufIndex=0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++)
{
if(psConstantBuffers[cbufIndex].name == psResBindings[i].name)
{
psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex;
}
}
}
}
}
static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType)
{
const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens;
uint32_t ui32NameOffset = *pui32Tokens;
pui16Tokens+= 2;
psClassType->ui16ID = *pui16Tokens++;
psClassType->ui16ConstBufStride = *pui16Tokens++;
psClassType->ui16Texture = *pui16Tokens++;
psClassType->ui16Sampler = *pui16Tokens++;
psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
return pui16Tokens;
}
static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance)
{
uint32_t ui32NameOffset = *pui16Tokens++ << 16;
ui32NameOffset |= *pui16Tokens++;
psClassInstance->ui16ID = *pui16Tokens++;
psClassInstance->ui16ConstBuf = *pui16Tokens++;
psClassInstance->ui16ConstBufOffset = *pui16Tokens++;
psClassInstance->ui16Texture = *pui16Tokens++;
psClassInstance->ui16Sampler = *pui16Tokens++;
psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
return pui16Tokens;
}
static void ReadInterfaces(const uint32_t* pui32Tokens,
ShaderInfo* psShaderInfo)
{
uint32_t i;
uint32_t ui32StartSlot;
const uint32_t* pui32FirstInterfaceToken = pui32Tokens;
const uint32_t ui32ClassInstanceCount = *pui32Tokens++;
const uint32_t ui32ClassTypeCount = *pui32Tokens++;
const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++;
/*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++;
const uint32_t ui32ClassInstanceOffset = *pui32Tokens++;
const uint32_t ui32ClassTypeOffset = *pui32Tokens++;
const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++;
const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset);
const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset);
const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset);
const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots;
ClassType* psClassTypes;
ClassInstance* psClassInstances;
psShaderInfo->psClassTypes.clear();
psShaderInfo->psClassTypes.resize(ui32ClassTypeCount);
psClassTypes = &psShaderInfo->psClassTypes[0];
for(i=0; i<ui32ClassTypeCount; ++i)
{
pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes+i);
psClassTypes[i].ui16ID = (uint16_t)i;
}
psShaderInfo->psClassInstances.clear();
psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount);
psClassInstances = &psShaderInfo->psClassInstances[0];
for(i=0; i<ui32ClassInstanceCount; ++i)
{
pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances+i);
}
//Slots map function table to $ThisPointer cbuffer variable index
ui32StartSlot = 0;
for(i=0; i<ui32InterfaceSlotRecordCount;++i)
{
uint32_t k;
const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++;
const uint32_t ui32Count = *pui32InterfaceSlotTokens++;
const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++;
const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++;
const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken+ui32TypeIDOffset);
const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken+ui32TableIDOffset);
for(k=0; k < ui32Count; ++k)
{
psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++;
}
ui32StartSlot += ui32SlotSpan;
}
}
void LoadShaderInfo(const uint32_t ui32MajorVersion,
const uint32_t ui32MinorVersion,
const ReflectionChunks* psChunks,
ShaderInfo* psInfo,
uint32_t decodeFlags)
{
const uint32_t* pui32Inputs = psChunks->pui32Inputs;
const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11;
const uint32_t* pui32Resources = psChunks->pui32Resources;
const uint32_t* pui32Interfaces = psChunks->pui32Interfaces;
const uint32_t* pui32Outputs = psChunks->pui32Outputs;
const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11;
const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams;
const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants;
const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11;
psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
psInfo->ui32MajorVersion = ui32MajorVersion;
psInfo->ui32MinorVersion = ui32MinorVersion;
if(pui32Inputs)
ReadInputSignatures(pui32Inputs, psInfo, 0);
if(pui32Inputs11)
ReadInputSignatures(pui32Inputs11, psInfo, 1);
if(pui32Resources)
ReadResources(pui32Resources, psInfo, decodeFlags);
if(pui32Interfaces)
ReadInterfaces(pui32Interfaces, psInfo);
if(pui32Outputs)
ReadOutputSignatures(pui32Outputs, psInfo, 0, 0);
if(pui32Outputs11)
ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1);
if(pui32OutputsWithStreams)
ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1);
if(pui32PatchConstants)
ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0);
if (pui32PatchConstants11)
ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1);
{
uint32_t i;
for(i=0; i<psInfo->psConstantBuffers.size();++i)
{
if (psInfo->psConstantBuffers[i].name == "$ThisPointer")
{
psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i];
}
}
}
}

806
src/toGLSL.cpp Normal file
View File

@ -0,0 +1,806 @@
#include <memory>
#include "internal_includes/tokens.h"
#include "internal_includes/decode.h"
#include "stdlib.h"
#include "stdio.h"
#include "bstrlib.h"
#include "internal_includes/toGLSL.h"
#include "internal_includes/toGLSLOperand.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/languages.h"
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/UseDefineChains.h"
#include "internal_includes/DataTypeAnalysis.h"
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/LoopTransform.h"
#include <algorithm>
#include <sstream>
// In GLSL, the input and output names cannot clash.
// Also, the output name of previous stage must match the input name of the next stage.
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
//
void ToGLSL::SetIOPrefixes()
{
switch (psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
psContext->inputPrefix = "in_";
psContext->outputPrefix = "vs_";
break;
case HULL_SHADER:
// Input always coming from vertex shader
psContext->inputPrefix = "vs_";
psContext->outputPrefix = "hs_";
break;
case DOMAIN_SHADER:
// There's no domain shader without hull shader
psContext->inputPrefix = "hs_";
psContext->outputPrefix = "ds_";
break;
case GEOMETRY_SHADER:
// The input depends on whether there's a tessellation shader before us
if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER))
psContext->inputPrefix = "ds_";
else
psContext->inputPrefix = "vs_";
psContext->outputPrefix = "gs_";
break;
case PIXEL_SHADER:
// The inputs can come from geom shader, domain shader or directly from vertex shader
if (psContext->psDependencies)
{
if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
{
psContext->inputPrefix = "gs_";
}
else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
{
psContext->inputPrefix = "ds_";
}
else
{
psContext->inputPrefix = "vs_";
}
}
else
{
psContext->inputPrefix = "vs_";
}
psContext->outputPrefix = "";
break;
case COMPUTE_SHADER:
default:
// No prefixes
psContext->inputPrefix = "";
psContext->outputPrefix = "";
break;
}
}
static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext)
{
bstring glsl = *psContext->currentGLSLString;
bstring extensions = psContext->extensions;
bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310);
bool GL_ARB_shader_image_load_store = false;
if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330))
{
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
}
if(!HaveCompute(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->eShaderType == COMPUTE_SHADER)
{
bcatcstr(extensions,"#extension GL_ARB_compute_shader : enable\n");
}
if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW])
{
bcatcstr(extensions, "#extension GL_ARB_shader_storage_buffer_object : enable\n");
}
}
if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) ||
!HaveAtomicCounter(psContext->psShader->eTargetLanguage))
{
if( psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] ||
psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED])
{
bcatcstr(extensions,"#extension GL_ARB_shader_atomic_counters : enable\n");
}
}
if (!HaveImageAtomics(psContext->psShader->eTargetLanguage))
{
if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] ||
psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] ||
psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH])
{
if (isES)
bcatcstr(extensions, "#extension GL_OES_shader_image_atomic : enable\n");
else
GL_ARB_shader_image_load_store = true;
}
}
if(!HaveGather(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C])
{
bcatcstr(extensions,"#extension GL_ARB_texture_gather : enable\n");
}
}
if(!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] ||
psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO])
{
bcatcstr(extensions,"#extension GL_ARB_gpu_shader5 : enable\n");
}
}
if(!HaveQueryLod(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD])
{
bcatcstr(extensions,"#extension GL_ARB_texture_query_lod : enable\n");
}
}
if(!HaveQueryLevels(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO])
{
bcatcstr(extensions,"#extension GL_ARB_texture_query_levels : enable\n");
}
}
if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] ||
psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] ||
psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED])
{
GL_ARB_shader_image_load_store = true;
bcatcstr(extensions,"#extension GL_ARB_shader_bit_encoding : enable\n");
}
else
if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] ||
psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] ||
psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED])
{
GL_ARB_shader_image_load_store = true;
}
}
if(!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage))
{
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
{
bcatcstr(extensions,"#extension GL_ARB_geometry_shader : enable\n");
}
}
if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
{
if(psContext->psShader->eShaderType == GEOMETRY_SHADER)
{
bcatcstr(extensions,"#extension GL_OES_geometry_shader : enable\n");
bcatcstr(extensions,"#extension GL_EXT_geometry_shader : enable\n");
}
}
if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310)
{
if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)
{
bcatcstr(extensions,"#extension GL_OES_tessellation_shader : enable\n");
bcatcstr(extensions,"#extension GL_EXT_tessellation_shader : enable\n");
}
}
if (GL_ARB_shader_image_load_store)
bcatcstr(extensions, "#extension GL_ARB_shader_image_load_store : enable\n");
//Handle fragment shader default precision
if ((psContext->psShader->eShaderType == PIXEL_SHADER) &&
(psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310))
{
// Float default precision is patched during runtime in GlslGpuProgramGLES.cpp:PatchupFragmentShaderText()
// Except on Vulkan
if(psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS)
bcatcstr(glsl, "precision highp float;\n");
// Define default int precision to highp to avoid issues on platforms that actually implement mediump
bcatcstr(glsl, "precision highp int;\n");
}
if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage))
{
bcatcstr(extensions,"#extension GL_ARB_fragment_coord_conventions : require\n");
}
if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150)
{
if(psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT)
bcatcstr(glsl,"layout(origin_upper_left) in vec4 gl_FragCoord;\n");
if(psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER)
bcatcstr(glsl,"layout(pixel_center_integer) in vec4 gl_FragCoord;\n");
}
/*
OpenGL 4.1 API spec:
To use any built-in input or output in the gl_PerVertex block in separable
program objects, shader code must redeclare that block prior to use.
*/
/* DISABLED FOR NOW */
/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410)
{
bcatcstr(glsl, "out gl_PerVertex {\n");
bcatcstr(glsl, "vec4 gl_Position;\n");
bcatcstr(glsl, "float gl_PointSize;\n");
bcatcstr(glsl, "float gl_ClipDistance[];");
bcatcstr(glsl, "};\n");
}*/
}
GLLang ChooseLanguage(Shader* psShader)
{
// Depends on the HLSL shader model extracted from bytecode.
switch(psShader->ui32MajorVersion)
{
case 5:
{
return LANG_430;
}
case 4:
{
return LANG_330;
}
default:
{
return LANG_120;
}
}
}
const char* GetVersionString(GLLang language)
{
switch(language)
{
case LANG_ES_100:
{
return "#version 100\n";
break;
}
case LANG_ES_300:
{
return "#version 300 es\n";
break;
}
case LANG_ES_310:
{
return "#version 310 es\n";
break;
}
case LANG_120:
{
return "#version 120\n";
break;
}
case LANG_130:
{
return "#version 130\n";
break;
}
case LANG_140:
{
return "#version 140\n";
break;
}
case LANG_150:
{
return "#version 150\n";
break;
}
case LANG_330:
{
return "#version 330\n";
break;
}
case LANG_400:
{
return "#version 400\n";
break;
}
case LANG_410:
{
return "#version 410\n";
break;
}
case LANG_420:
{
return "#version 420\n";
break;
}
case LANG_430:
{
return "#version 430\n";
break;
}
case LANG_440:
{
return "#version 440\n";
break;
}
default:
{
return "";
break;
}
}
}
static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType)
{
switch (eType)
{
default:
case MAIN_PHASE: return "";
case HS_GLOBAL_DECL_PHASE: return "hs_global_decls";
case HS_FORK_PHASE: return "fork_phase";
case HS_CTRL_POINT_PHASE: return "control_point_phase";
case HS_JOIN_PHASE: return "join_phase";
}
}
static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
{
uint32_t i;
bstring glsl = psContext->glsl;
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
{
ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
const char *Type;
uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
switch (psSig->eComponentType)
{
default:
case INOUT_COMPONENT_FLOAT32:
Type = ui32NumComponents > 1 ? "vec" : "float";
break;
case INOUT_COMPONENT_SINT32:
Type = ui32NumComponents > 1 ? "ivec" : "int";
break;
case INOUT_COMPONENT_UINT32:
Type = ui32NumComponents > 1 ? "uvec" : "uint";
break;
}
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
continue;
std::string inputName;
{
std::ostringstream oss;
oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
inputName = oss.str();
}
std::string outputName;
{
std::ostringstream oss;
oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex;
outputName = oss.str();
}
const char * prec = HavePrecisionQualifers(psContext->psShader->eTargetLanguage) ? "highp ": "";
psContext->AddIndentation();
if (ui32NumComponents > 1) // TODO Precision
bformata(glsl, "in %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
else
bformata(glsl, "in %s%s %s%s%d[];\n", prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
psContext->AddIndentation();
if (ui32NumComponents > 1) // TODO Precision
bformata(glsl, "out %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
else
bformata(glsl, "out %s%s %s%s%d[];\n", prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
}
psContext->AddIndentation();
bcatcstr(glsl, "void passthrough_ctrl_points()\n");
psContext->AddIndentation();
bcatcstr(glsl, "{\n");
psContext->indent++;
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
{
const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
psContext->AddIndentation();
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n");
else
bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
}
psContext->indent--;
psContext->AddIndentation();
bcatcstr(glsl, "}\n");
}
GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage)
{
language = suggestedLanguage;
if (language == LANG_DEFAULT)
{
language = ChooseLanguage(psContext->psShader);
}
return language;
}
bool ToGLSL::Translate()
{
bstring glsl;
uint32_t i;
Shader* psShader = psContext->psShader;
uint32_t ui32Phase;
psContext->psTranslator = this;
if (language == LANG_DEFAULT)
SetLanguage(LANG_DEFAULT);
SetIOPrefixes();
psShader->ExpandSWAPCs();
psShader->ForcePositionToHighp();
psShader->AnalyzeIOOverlap();
psShader->FindUnusedGlobals(psContext->flags);
psContext->indent = 0;
glsl = bfromcstralloc (1024 * 10, "\n");
bstring extensions = bfromcstralloc (1024 * 10, GetVersionString(language));
psContext->extensions = extensions;
psContext->glsl = glsl;
for(i=0; i<psShader->asPhases.size();++i)
{
psShader->asPhases[i].postShaderCode = bfromcstralloc (1024 * 5, "");
psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
}
psContext->currentGLSLString = &glsl;
psShader->eTargetLanguage = language;
psContext->currentPhase = MAIN_PHASE;
if (psShader->extensions)
{
if (psShader->extensions->ARB_explicit_attrib_location)
bcatcstr(extensions, "#extension GL_ARB_explicit_attrib_location : require\n");
if (psShader->extensions->ARB_explicit_uniform_location)
bcatcstr(extensions, "#extension GL_ARB_explicit_uniform_location : require\n");
if (psShader->extensions->ARB_shading_language_420pack)
bcatcstr(extensions, "#extension GL_ARB_shading_language_420pack : require\n");
}
psContext->ClearDependencyData();
AddVersionDependentCode(psContext);
psShader->PrepareStructuredBufferBindingSlots();
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
ShaderPhase &phase = psShader->asPhases[ui32Phase];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
psShader->ResolveStructuredBufferBindingSlots(&phase);
phase.PruneConstArrays();
}
psShader->PruneTempRegisters();
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
// Loop transform can only be done after the temps have been pruned
ShaderPhase &phase = psShader->asPhases[ui32Phase];
HLSLcc::DoLoopTransform(phase);
}
//Special case. Can have multiple phases.
if(psShader->eShaderType == HULL_SHADER)
{
const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE };
uint32_t ui32PhaseCallIndex;
int perPatchSectionAdded = 0;
int hasControlPointPhase = 0;
psShader->ConsolidateHullTempVars();
// Find out if we have a passthrough hull shader
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
hasControlPointPhase = 1;
}
// Phase 1 is always the global decls phase, no instructions
for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i)
{
TranslateDeclaration(&psShader->asPhases[1].psDecl[i]);
}
if (hasControlPointPhase == 0)
{
DoHullShaderPassthrough(psContext);
}
for(ui32Phase=2; ui32Phase<psShader->asPhases.size(); ui32Phase++)
{
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
psContext->currentPhase = ui32Phase;
#ifdef _DEBUG
bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
#endif
for (i = 0; i < psPhase->psDecl.size(); ++i)
{
TranslateDeclaration(&psPhase->psDecl[i]);
}
bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase);
psContext->indent++;
if (psPhase->psInst.size() > 0)
{
//The minus one here is remove the return statement at end of phases.
//We don't want to translate that, we'll just end the function body.
ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET);
for (i = 0; i < psPhase->psInst.size() - 1; ++i)
{
TranslateInstruction(&psPhase->psInst[i]);
}
}
psContext->indent--;
bcatcstr(glsl, "}\n");
}
bcatcstr(glsl, "void main()\n{\n");
psContext->indent++;
// There are cases when there are no control point phases and we have to do passthrough
if (hasControlPointPhase == 0)
{
// Passthrough control point phase, run the rest only once per patch
psContext->AddIndentation();
bcatcstr(glsl, "passthrough_ctrl_points();\n");
psContext->AddIndentation();
bcatcstr(glsl, "barrier();\n");
psContext->AddIndentation();
bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
psContext->AddIndentation();
bcatcstr(glsl, "{\n");
psContext->indent++;
perPatchSectionAdded = 1;
}
for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++)
{
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
{
uint32_t i;
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
continue;
if (psPhase->earlyMain->slen > 1)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- Start Early Main ---\n");
#endif
bconcat(glsl, psPhase->earlyMain);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- End Early Main ---\n");
#endif
}
for (i = 0; i < psPhase->ui32InstanceCount; i++)
{
psContext->AddIndentation();
bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i);
}
if (psPhase->hasPostShaderCode)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- Post shader code ---\n");
#endif
bconcat(glsl, psPhase->postShaderCode);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- End post shader code ---\n");
#endif
}
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
{
// We're done printing control point phase, run the rest only once per patch
psContext->AddIndentation();
bcatcstr(glsl, "barrier();\n");
psContext->AddIndentation();
bcatcstr(glsl, "if (gl_InvocationID == 0)\n");
psContext->AddIndentation();
bcatcstr(glsl, "{\n");
psContext->indent++;
perPatchSectionAdded = 1;
}
}
}
if (perPatchSectionAdded != 0)
{
psContext->indent--;
psContext->AddIndentation();
bcatcstr(glsl, "}\n");
}
psContext->indent--;
bcatcstr(glsl, "}\n");
// Concat extensions and glsl for the final shader code.
bconcat(extensions, glsl);
bdestroy(glsl);
psContext->glsl = extensions;
glsl = NULL;
if(psContext->psDependencies)
{
//Save partitioning and primitive type for use by domain shader.
psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim;
psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning;
}
return true;
}
if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies)
{
//Load partitioning and primitive type from hull shader.
switch(psContext->psDependencies->eTessOutPrim)
{
case TESSELLATOR_OUTPUT_TRIANGLE_CCW:
{
bcatcstr(glsl, "layout(ccw) in;\n");
break;
}
case TESSELLATOR_OUTPUT_TRIANGLE_CW:
{
bcatcstr(glsl, "layout(cw) in;\n");
break;
}
case TESSELLATOR_OUTPUT_POINT:
{
bcatcstr(glsl, "layout(point_mode) in;\n");
break;
}
default:
{
break;
}
}
switch(psContext->psDependencies->eTessPartitioning)
{
case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
{
bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n");
break;
}
case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
{
bcatcstr(glsl, "layout(fractional_even_spacing) in;\n");
break;
}
default:
{
break;
}
}
}
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
{
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
}
bcatcstr(glsl, "void main()\n{\n");
psContext->indent++;
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- Start Early Main ---\n");
#endif
bconcat(glsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(glsl, "//--- End Early Main ---\n");
#endif
}
for(i=0; i < psShader->asPhases[0].psInst.size(); ++i)
{
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
}
psContext->indent--;
bcatcstr(glsl, "}\n");
// Concat extensions and glsl for the final shader code.
bconcat(extensions, glsl);
bdestroy(glsl);
psContext->glsl = extensions;
glsl = NULL;
return true;
}

2994
src/toGLSLDeclaration.cpp Normal file

File diff suppressed because it is too large Load Diff

4127
src/toGLSLInstruction.cpp Normal file

File diff suppressed because it is too large Load Diff

1616
src/toGLSLOperand.cpp Normal file

File diff suppressed because it is too large Load Diff

265
src/toMetal.cpp Normal file
View File

@ -0,0 +1,265 @@
#include "internal_includes/toMetal.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Shader.h"
#include "internal_includes/debug.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/toGLSL.h"
#include "internal_includes/LoopTransform.h"
#include "internal_includes/HLSLccToolkit.h"
#include <algorithm>
static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs)
{
StructDefinition &d = defs[sname];
if (d.m_IsPrinted)
return;
d.m_IsPrinted = true;
std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName)
{
PrintStructDeclaration(psContext, glsl, depName, defs);
});
bformata(glsl, "struct %s\n{\n", sname.c_str());
psContext->indent++;
std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](std::string &mem)
{
psContext->AddIndentation();
bcatcstr(glsl, mem.c_str());
bcatcstr(glsl, ";\n");
});
psContext->indent--;
bcatcstr(glsl, "};\n\n");
}
void ToMetal::PrintStructDeclarations(StructDefinitions &defs)
{
bstring glsl = *psContext->currentGLSLString;
StructDefinition &args = defs[""];
std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname)
{
PrintStructDeclaration(psContext, glsl, sname, defs);
});
}
bool ToMetal::Translate()
{
bstring glsl;
uint32_t i;
Shader* psShader = psContext->psShader;
psContext->psTranslator = this;
SetIOPrefixes();
psShader->ExpandSWAPCs();
psShader->ForcePositionToHighp();
psShader->AnalyzeIOOverlap();
psShader->FindUnusedGlobals(psContext->flags);
psContext->indent = 0;
glsl = bfromcstralloc(1024 * 10, "");
bstring bodyglsl = bfromcstralloc(1024 * 10, "");
psContext->glsl = glsl;
for (i = 0; i < psShader->asPhases.size(); ++i)
{
psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, "");
psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
}
psContext->currentGLSLString = &glsl;
psShader->eTargetLanguage = LANG_METAL;
psShader->extensions = NULL;
psContext->currentPhase = MAIN_PHASE;
psContext->ClearDependencyData();
ClampPartialPrecisions();
psShader->PrepareStructuredBufferBindingSlots();
ShaderPhase &phase = psShader->asPhases[0];
phase.UnvectorizeImmMoves();
psContext->DoDataTypeAnalysis(&phase);
phase.ResolveUAVProperties();
psShader->ResolveStructuredBufferBindingSlots(&phase);
phase.PruneConstArrays();
HLSLcc::DoLoopTransform(phase);
psShader->PruneTempRegisters();
bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
{
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
}
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
{
m_StructDefinitions[""].m_Members.push_back(GetInputStructName() + " input [[ stage_in ]]");
m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName());
}
if (psShader->eShaderType != COMPUTE_SHADER)
{
if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0)
{
m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName());
}
}
PrintStructDeclarations(m_StructDefinitions);
psContext->currentGLSLString = &bodyglsl;
switch (psShader->eShaderType)
{
case VERTEX_SHADER:
bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
break;
case PIXEL_SHADER:
bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n");
break;
case COMPUTE_SHADER:
bcatcstr(bodyglsl, "kernel void computeMain(\n");
break;
default:
// Not supported
ASSERT(0);
return false;
}
psContext->indent++;
for (auto itr = m_StructDefinitions[""].m_Members.begin(); itr != m_StructDefinitions[""].m_Members.end(); itr++)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, itr->c_str());
if (itr + 1 != m_StructDefinitions[""].m_Members.end())
bcatcstr(bodyglsl, ",\n");
}
bcatcstr(bodyglsl, ")\n{\n");
if (psShader->eShaderType != COMPUTE_SHADER)
{
psContext->AddIndentation();
bcatcstr(bodyglsl, GetOutputStructName().c_str());
bcatcstr(bodyglsl, " output;\n");
}
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
{
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
#endif
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
#ifdef _DEBUG
psContext->AddIndentation();
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
#endif
}
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
{
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
}
psContext->indent--;
bcatcstr(bodyglsl, "}\n");
psContext->currentGLSLString = &glsl;
bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str());
// Print out extra functions we generated
std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p)
{
bcatcstr(glsl, p.second.c_str());
bcatcstr(glsl, "\n");
});
// And then the actual function body
bconcat(glsl, bodyglsl);
bdestroy(bodyglsl);
return true;
}
void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body)
{
if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
return;
m_FunctionDefinitions.insert(std::make_pair(name, body));
}
std::string ToMetal::GetOutputStructName() const
{
switch(psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
return "Mtl_VertexOut";
case PIXEL_SHADER:
return "Mtl_FragmentOut";
default:
ASSERT(0);
return "";
}
}
std::string ToMetal::GetInputStructName() const
{
switch (psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
return "Mtl_VertexIn";
case PIXEL_SHADER:
return "Mtl_FragmentIn";
case COMPUTE_SHADER:
return "Mtl_KernelIn";
default:
ASSERT(0);
return "";
}
}
void ToMetal::SetIOPrefixes()
{
switch (psContext->psShader->eShaderType)
{
case VERTEX_SHADER:
psContext->inputPrefix = "input.";
psContext->outputPrefix = "output.";
break;
case PIXEL_SHADER:
psContext->inputPrefix = "input.";
psContext->outputPrefix = "output.";
break;
case COMPUTE_SHADER:
psContext->inputPrefix = "";
psContext->outputPrefix = "";
break;
default:
ASSERT(0);
break;
}
}
void ToMetal::ClampPartialPrecisions()
{
HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL,
[](std::vector<Instruction>::iterator &i, Operand *o, uint32_t flags)
{
if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8)
o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16;
});
}

1979
src/toMetalDeclaration.cpp Normal file

File diff suppressed because it is too large Load Diff

3731
src/toMetalInstruction.cpp Normal file

File diff suppressed because it is too large Load Diff

1120
src/toMetalOperand.cpp Normal file

File diff suppressed because it is too large Load Diff