1
0
mirror of https://github.com/microsoft/DirectXMath synced 2024-09-19 14:49:54 +00:00

CHPE platform support

This commit is contained in:
Chuck Walbourn 2017-01-13 13:52:41 -08:00
parent c887a52207
commit f7954ad6ed
4 changed files with 40 additions and 40 deletions

View File

@ -5,7 +5,7 @@
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
@ -23,7 +23,7 @@
#error DirectX Math Visual C++ 2013 or later.
#endif
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_M_HYBRID_X86_ARM64) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
#define _XM_VECTORCALL_ 1
#endif
@ -74,9 +74,9 @@
#endif
#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
#if defined(_M_IX86) || defined(_M_X64)
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_HYBRID_X86_ARM64)
#define _XM_SSE_INTRINSICS_
#elif defined(_M_ARM) || defined(_M_ARM64)
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
#define _XM_ARM_NEON_INTRINSICS_
#elif !defined(_XM_NO_INTRINSICS_)
#error DirectX Math does not support this target
@ -115,7 +115,7 @@
#endif
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
@ -282,7 +282,7 @@ struct __vector4
#endif // _XM_NO_INTRINSICS_
//------------------------------------------------------------------------------
// Vector intrinsic: Four 32 bit floating point components aligned on a 16 byte
// Vector intrinsic: Four 32 bit floating point components aligned on a 16 byte
// boundary and mapped to hardware vector registers
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
typedef __m128 XMVECTOR;
@ -300,14 +300,14 @@ typedef const XMVECTOR& FXMVECTOR;
#endif
// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, ARM64, and x64 vector call; by reference otherwise
#if ( defined(_M_ARM) || defined(_M_ARM64) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
#if ( defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || (_XM_VECTORCALL_ && !defined(_M_IX86) ) ) && !defined(_XM_NO_INTRINSICS_)
typedef const XMVECTOR GXMVECTOR;
#else
typedef const XMVECTOR& GXMVECTOR;
#endif
// Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for ARM64 and vector call; by reference otherwise
#if ( defined(_M_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
typedef const XMVECTOR HXMVECTOR;
#else
typedef const XMVECTOR& HXMVECTOR;
@ -407,7 +407,7 @@ XMVECTOR XM_CALLCONV operator/ (FXMVECTOR V, float S);
struct XMMATRIX;
// Fix-up for (1st) XMMATRIX parameter to pass in-register for ARM64 and vector call; by reference otherwise
#if ( defined(_M_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ ) && !defined(_XM_NO_INTRINSICS_)
typedef const XMMATRIX FXMMATRIX;
#else
typedef const XMMATRIX& FXMMATRIX;
@ -1174,21 +1174,21 @@ XMFLOAT3* XM_CALLCONV XMVector3TransformNormalStream(_Out_writes_bytes_(si
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
_In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M);
XMVECTOR XM_CALLCONV XMVector3Project(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
XMVECTOR XM_CALLCONV XMVector3Project(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World);
XMFLOAT3* XM_CALLCONV XMVector3ProjectStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
_In_ size_t InputStride, _In_ size_t VectorCount,
_In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
_In_ size_t InputStride, _In_ size_t VectorCount,
_In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
_In_ FXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World);
XMVECTOR XM_CALLCONV XMVector3Unproject(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
XMVECTOR XM_CALLCONV XMVector3Unproject(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World);
XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
_In_ size_t OutputStride,
_In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
_In_ size_t InputStride, _In_ size_t VectorCount,
_In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
_In_ size_t InputStride, _In_ size_t VectorCount,
_In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
_In_ FXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World);
/****************************************************************************
@ -1274,9 +1274,9 @@ XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector(FXMVECTOR Ang
XMMATRIX XM_CALLCONV XMMatrixRotationNormal(FXMVECTOR NormalAxis, float Angle);
XMMATRIX XM_CALLCONV XMMatrixRotationAxis(FXMVECTOR Axis, float Angle);
XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion(FXMVECTOR Quaternion);
XMMATRIX XM_CALLCONV XMMatrixTransformation2D(FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling,
XMMATRIX XM_CALLCONV XMMatrixTransformation2D(FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling,
FXMVECTOR RotationOrigin, float Rotation, GXMVECTOR Translation);
XMMATRIX XM_CALLCONV XMMatrixTransformation(FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, FXMVECTOR Scaling,
XMMATRIX XM_CALLCONV XMMatrixTransformation(FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, FXMVECTOR Scaling,
GXMVECTOR RotationOrigin, HXMVECTOR RotationQuaternion, HXMVECTOR Translation);
XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, float Rotation, FXMVECTOR Translation);
XMMATRIX XM_CALLCONV XMMatrixAffineTransformation(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, FXMVECTOR RotationQuaternion, GXMVECTOR Translation);
@ -1711,12 +1711,12 @@ template<uint32_t VSLeftRotateElements, uint32_t Select0, uint32_t Select1, uint
*
****************************************************************************/
// The purpose of the following global constants is to prevent redundant
// The purpose of the following global constants is to prevent redundant
// reloading of the constants when they are referenced by more than one
// separate inline math routine called within the same function. Declaring
// a constant locally within a routine is sufficient to prevent redundant
// reloads of that constant when that single routine is called multiple
// times in a function, but if the constant is used (and declared) in a
// times in a function, but if the constant is used (and declared) in a
// separate math routine it would be reloaded.
#ifndef XMGLOBALCONST

View File

@ -2996,7 +2996,7 @@ inline XMMATRIX& XMMATRIX::operator/= (float S)
r[3] = XMVectorDivide( r[3], vS );
return *this;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
float32x4_t vS = vdupq_n_f32( S );
r[0] = vdivq_f32( r[0], vS );
r[1] = vdivq_f32( r[1], vS );
@ -3083,7 +3083,7 @@ inline XMMATRIX XMMATRIX::operator/ (float S) const
R.r[3] = XMVectorDivide( r[3], vS );
return R;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
float32x4_t vS = vdupq_n_f32( S );
XMMATRIX R;
R.r[0] = vdivq_f32( r[0], vS );

View File

@ -2352,7 +2352,7 @@ inline XMVECTOR XM_CALLCONV XMVectorRound
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
return vrndnq_f32(V);
#else
uint32x4_t sign = vandq_u32( V, g_XMNegativeZero );
@ -2416,7 +2416,7 @@ inline XMVECTOR XM_CALLCONV XMVectorTruncate
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
return vrndq_f32(V);
#else
float32x4_t vTest = vabsq_f32( V );
@ -2465,7 +2465,7 @@ inline XMVECTOR XM_CALLCONV XMVectorFloor
Result.vector4_f32[3] = floorf( V.vector4_f32[3] );
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
return vrndmq_f32(V);
#else
float32x4_t vTest = vabsq_f32( V );
@ -2518,7 +2518,7 @@ inline XMVECTOR XM_CALLCONV XMVectorCeiling
Result.vector4_f32[3] = ceilf( V.vector4_f32[3] );
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
return vrndpq_f32(V);
#else
float32x4_t vTest = vabsq_f32( V );
@ -2814,7 +2814,7 @@ inline XMVECTOR XM_CALLCONV XMVectorSum
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
XMVECTOR vTemp = vpaddq_f32(V, V);
return vpaddq_f32(vTemp,vTemp);
#else
@ -3039,7 +3039,7 @@ inline XMVECTOR XM_CALLCONV XMVectorDivide
Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3];
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
return vdivq_f32( V1, V2 );
#else
// 2 iterations of Newton-Raphson refinement of reciprocal
@ -3138,7 +3138,7 @@ inline XMVECTOR XM_CALLCONV XMVectorReciprocal
Result.vector4_f32[3] = 1.f / V.vector4_f32[3];
return Result;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
float32x4_t one = vdupq_n_f32(1.0f);
return vdivq_f32(one,V);
#else
@ -8003,7 +8003,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
__prefetch( pInputVector+(XM_CACHE_LINE_SIZE*3) );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
V.val[0] = vdivq_f32( vResult0, W );
V.val[1] = vdivq_f32( vResult1, W );
#else
@ -8037,7 +8037,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
V = vget_high_f32( vResult );
float32x2_t W = vdup_lane_f32( V, 1 );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
V = vget_low_f32( vResult );
V = vdiv_f32( V, W );
#else
@ -10541,7 +10541,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
__prefetch( pInputVector+(XM_CACHE_LINE_SIZE*5) );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
V.val[0] = vdivq_f32( vResult0, W );
V.val[1] = vdivq_f32( vResult1, W );
V.val[2] = vdivq_f32( vResult2, W );
@ -10580,7 +10580,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
VH = vget_high_f32(vResult);
XMVECTOR W = vdupq_lane_f32( VH, 1 );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
vResult = vdivq_f32( vResult, W );
#else
// 2 iterations of Newton-Raphson refinement of reciprocal for W
@ -11501,7 +11501,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
__prefetch( pInputVector+(XM_CACHE_LINE_SIZE*5) );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
vResult0 = vdivq_f32( vResult0, W );
vResult1 = vdivq_f32( vResult1, W );
vResult2 = vdivq_f32( vResult2, W );
@ -11549,7 +11549,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
VH = vget_high_f32(vResult);
XMVECTOR W = vdupq_lane_f32( VH, 1 );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
vResult = vdivq_f32( vResult, W );
#else
// 2 iterations of Newton-Raphson refinement of reciprocal for W
@ -12096,7 +12096,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
__prefetch( pInputVector+(XM_CACHE_LINE_SIZE*5) );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
V.val[0] = vdivq_f32( vResult0, W );
V.val[1] = vdivq_f32( vResult1, W );
V.val[2] = vdivq_f32( vResult2, W );
@ -12146,7 +12146,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
VH = vget_high_f32(vResult);
XMVECTOR W = vdupq_lane_f32( VH, 1 );
#ifdef _M_ARM64
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
vResult = vdivq_f32( vResult, W );
#else
// 2 iterations of Newton-Raphson refinement of reciprocal for W

View File

@ -30,7 +30,7 @@ inline float PackedVector::XMConvertHalfToFloat
__m128i V1 = _mm_cvtsi32_si128( static_cast<uint32_t>(Value) );
__m128 V2 = _mm_cvtph_ps( V1 );
return _mm_cvtss_f32( V2 );
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(_M_ARM64) && !defined(_XM_NO_INTRINSICS_)
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) && !defined(_XM_NO_INTRINSICS_)
uint16x4_t vHalf = vdup_n_u16(Value);
float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf));
return vgetq_lane_f32(vFloat, 0);
@ -259,7 +259,7 @@ inline float* PackedVector::XMConvertHalfToFloatStream
XM_SFENCE();
return pOutputStream;
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(_M_ARM64) && !defined(_XM_NO_INTRINSICS_)
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) && !defined(_XM_NO_INTRINSICS_)
const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
@ -396,7 +396,7 @@ inline PackedVector::HALF PackedVector::XMConvertFloatToHalf
__m128 V1 = _mm_set_ss( Value );
__m128i V2 = _mm_cvtps_ph( V1, 0 );
return static_cast<HALF>( _mm_cvtsi128_si32(V2) );
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(_M_ARM64) && !defined(_XM_NO_INTRINSICS_)
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) && !defined(_XM_NO_INTRINSICS_)
float32x4_t vFloat = vdupq_n_f32(Value);
float16x4_t vHalf = vcvt_f16_f32(vFloat);
return vget_lane_u16(vreinterpret_u16_f16(vHalf), 0);
@ -621,7 +621,7 @@ inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream
}
return pOutputStream;
#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(_M_ARM64) && !defined(_XM_NO_INTRINSICS_)
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) && !defined(_XM_NO_INTRINSICS_)
const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);