Porperly check for and use __builtin_assume_aligned
This commit is contained in:
parent
65f9b2792c
commit
841d0bb893
@ -10,12 +10,6 @@
|
||||
#include "mixer_defs.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define ASSUME_ALIGNED(ptr, ...) __builtin_assume_aligned((ptr), __VA_ARGS__)
|
||||
#else
|
||||
#define ASSUME_ALIGNED(ptr, ...) (ptr)
|
||||
#endif
|
||||
|
||||
const ALfloat *Resample_lerp32_Neon(const BsincState* UNUSED(state), const ALfloat *restrict src,
|
||||
ALuint frac, ALint increment, ALfloat *restrict dst,
|
||||
ALsizei numsamples)
|
||||
@ -248,12 +242,14 @@ const ALfloat *Resample_bsinc32_Neon(const BsincState *state, const ALfloat *res
|
||||
const float32x4_t pf4 = vdupq_n_f32(pf);
|
||||
for(j = 0;j < m;j+=4)
|
||||
{
|
||||
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
|
||||
const float32x4_t f4 = vmlaq_f32(vmlaq_f32(vld1q_f32(&fil[j]),
|
||||
sf4, vld1q_f32(&scd[j])),
|
||||
pf4, vmlaq_f32(vld1q_f32(&phd[j]),
|
||||
sf4, vld1q_f32(&spd[j])
|
||||
)
|
||||
);
|
||||
/* r += f*src */
|
||||
r4 = vmlaq_f32(r4, f4, vld1q_f32(&src[j]));
|
||||
}
|
||||
}
|
||||
@ -283,6 +279,9 @@ static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2]
|
||||
leftright2 = vset_lane_f32(right, leftright2, 1);
|
||||
leftright4 = vcombine_f32(leftright2, leftright2);
|
||||
}
|
||||
Values = ASSUME_ALIGNED(Values, 16);
|
||||
Coeffs = ASSUME_ALIGNED(Coeffs, 16);
|
||||
CoeffStep = ASSUME_ALIGNED(CoeffStep, 16);
|
||||
for(c = 0;c < IrSize;c += 2)
|
||||
{
|
||||
const ALsizei o0 = (Offset+c)&HRIR_MASK;
|
||||
@ -314,6 +313,8 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2],
|
||||
leftright2 = vset_lane_f32(right, leftright2, 1);
|
||||
leftright4 = vcombine_f32(leftright2, leftright2);
|
||||
}
|
||||
Values = ASSUME_ALIGNED(Values, 16);
|
||||
Coeffs = ASSUME_ALIGNED(Coeffs, 16);
|
||||
for(c = 0;c < IrSize;c += 2)
|
||||
{
|
||||
const ALsizei o0 = (Offset+c)&HRIR_MASK;
|
||||
@ -343,6 +344,9 @@ void Mix_Neon(const ALfloat *data, ALsizei OutChans, ALfloat (*restrict OutBuffe
|
||||
float32x4_t gain4;
|
||||
ALsizei c;
|
||||
|
||||
data = ASSUME_ALIGNED(data, 16);
|
||||
OutBuffer = ASSUME_ALIGNED(OutBuffer, 16);
|
||||
|
||||
delta = (Counter > 0) ? 1.0f/(ALfloat)Counter : 0.0f;
|
||||
|
||||
for(c = 0;c < OutChans;c++)
|
||||
@ -412,6 +416,9 @@ void MixRow_Neon(ALfloat *OutBuffer, const ALfloat *Gains, const ALfloat (*restr
|
||||
float32x4_t gain4;
|
||||
ALsizei c;
|
||||
|
||||
data = ASSUME_ALIGNED(data, 16);
|
||||
OutBuffer = ASSUME_ALIGNED(OutBuffer, 16);
|
||||
|
||||
for(c = 0;c < InChans;c++)
|
||||
{
|
||||
ALsizei pos = 0;
|
||||
|
@ -12,12 +12,6 @@
|
||||
#include "mixer_defs.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define ASSUME_ALIGNED(ptr, ...) __builtin_assume_aligned((ptr), __VA_ARGS__)
|
||||
#else
|
||||
#define ASSUME_ALIGNED(ptr, ...) (ptr)
|
||||
#endif
|
||||
|
||||
const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *restrict src,
|
||||
ALuint frac, ALint increment, ALfloat *restrict dst,
|
||||
ALsizei dstlen)
|
||||
@ -52,9 +46,11 @@ const ALfloat *Resample_bsinc32_SSE(const BsincState *state, const ALfloat *rest
|
||||
#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
|
||||
for(j = 0;j < m;j+=4)
|
||||
{
|
||||
/* f = ((fil + sf*scd) + pf*(phd + sf*spd)) */
|
||||
const __m128 f4 = MLA4(MLA4(LD4(&fil[j]), sf4, LD4(&scd[j])),
|
||||
pf4, MLA4(LD4(&phd[j]), sf4, LD4(&spd[j]))
|
||||
);
|
||||
/* r += f*src */
|
||||
r4 = MLA4(r4, f4, ULD4(&src[j]));
|
||||
}
|
||||
#undef MLA4
|
||||
@ -84,6 +80,9 @@ static inline void ApplyCoeffsStep(ALsizei Offset, ALfloat (*restrict Values)[2]
|
||||
__m128 vals = _mm_setzero_ps();
|
||||
ALsizei i;
|
||||
|
||||
Values = ASSUME_ALIGNED(Values, 16);
|
||||
Coeffs = ASSUME_ALIGNED(Coeffs, 16);
|
||||
CoeffStep = ASSUME_ALIGNED(CoeffStep, 16);
|
||||
if((Offset&1))
|
||||
{
|
||||
const ALsizei o0 = Offset&HRIR_MASK;
|
||||
@ -145,6 +144,8 @@ static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*restrict Values)[2],
|
||||
__m128 coeffs;
|
||||
ALsizei i;
|
||||
|
||||
Values = ASSUME_ALIGNED(Values, 16);
|
||||
Coeffs = ASSUME_ALIGNED(Coeffs, 16);
|
||||
if((Offset&1))
|
||||
{
|
||||
const ALsizei o0 = Offset&HRIR_MASK;
|
||||
|
@ -411,6 +411,19 @@ ELSE()
|
||||
SET(CMAKE_REQUIRED_FLAGS "${OLD_REQUIRED_FLAGS}")
|
||||
ENDIF()
|
||||
|
||||
CHECK_C_SOURCE_COMPILES("
|
||||
int main()
|
||||
{
|
||||
float *ptr;
|
||||
ptr = __builtin_assume_aligned(ptr, 16);
|
||||
return 0;
|
||||
}" HAVE___BUILTIN_ASSUME_ALIGNED)
|
||||
IF(HAVE___BUILTIN_ASSUME_ALIGNED)
|
||||
SET(ASSUME_ALIGNED_DECL "__builtin_assume_aligned(x, y)")
|
||||
ELSE()
|
||||
SET(ASSUME_ALIGNED_DECL "x")
|
||||
ENDIF()
|
||||
|
||||
SET(SSE_SWITCH "")
|
||||
SET(SSE2_SWITCH "")
|
||||
SET(SSE3_SWITCH "")
|
||||
|
@ -5,6 +5,9 @@
|
||||
/* Define any available alignment declaration */
|
||||
#define ALIGN(x) ${ALIGN_DECL}
|
||||
|
||||
/* Define a built-in call indicating an aligned data pointer */
|
||||
#define ASSUME_ALIGNED(x, y) ${ASSUME_ALIGNED_DECL}
|
||||
|
||||
/* Explicit hidden visibility attribute */
|
||||
#define HIDDEN_DECL ${HIDDEN_DECL}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user