Implement dry and wet mixers for Neon
Code provided by Philippe Simons <simons.philippe@gmail.com>.
This commit is contained in:
parent
49baa9128d
commit
a4bc0a46e9
@ -1745,8 +1745,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList)
|
||||
|
||||
device->UpdateSize = (ALuint64)device->UpdateSize * freq /
|
||||
device->Frequency;
|
||||
/* SSE does best with the update size being a multiple of 4 */
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
/* SSE and Neon do best with the update size being a multiple of 4 */
|
||||
if((CPUCapFlags&(CPU_CAP_SSE|CPU_CAP_NEON)) != 0)
|
||||
device->UpdateSize = (device->UpdateSize+3)&~3;
|
||||
|
||||
device->Frequency = freq;
|
||||
@ -1861,6 +1861,8 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const ALCint *attrList)
|
||||
{
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
WARN("SSE performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize);
|
||||
if((CPUCapFlags&CPU_CAP_NEON))
|
||||
WARN("NEON performs best with multiple of 4 update sizes (%u)\n", device->UpdateSize);
|
||||
}
|
||||
|
||||
SetMixerFPUMode(&oldMode);
|
||||
|
@ -118,6 +118,10 @@ static DryMixerFunc SelectDirectMixer(void)
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
return MixDirect_SSE;
|
||||
#endif
|
||||
#ifdef HAVE_NEON
|
||||
if((CPUCapFlags&CPU_CAP_NEON))
|
||||
return MixDirect_Neon;
|
||||
#endif
|
||||
|
||||
return MixDirect_C;
|
||||
}
|
||||
@ -128,6 +132,10 @@ static WetMixerFunc SelectSendMixer(void)
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
return MixSend_SSE;
|
||||
#endif
|
||||
#ifdef HAVE_NEON
|
||||
if((CPUCapFlags&CPU_CAP_NEON))
|
||||
return MixSend_Neon;
|
||||
#endif
|
||||
|
||||
return MixSend_C;
|
||||
}
|
||||
|
@ -27,5 +27,7 @@ void MixSend_SSE(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,A
|
||||
|
||||
/* Neon mixers */
|
||||
void MixDirect_Hrtf_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint);
|
||||
void MixDirect_Neon(const struct DirectParams*,const ALfloat*restrict,ALuint,ALuint,ALuint,ALuint);
|
||||
void MixSend_Neon(const struct SendParams*,const ALfloat*restrict,ALuint,ALuint,ALuint);
|
||||
|
||||
#endif /* MIXER_DEFS_H */
|
||||
|
@ -14,11 +14,15 @@ static inline void ApplyCoeffsStep(const ALuint IrSize,
|
||||
ALfloat (*restrict Coeffs)[2],
|
||||
const ALfloat (*restrict CoeffStep)[2])
|
||||
{
|
||||
float32x4_t coeffs, deltas;
|
||||
ALuint c;
|
||||
for(c = 0;c < IrSize;c++)
|
||||
|
||||
for(c = 0;c < IrSize;c += 2)
|
||||
{
|
||||
Coeffs[c][0] += CoeffStep[c][0];
|
||||
Coeffs[c][1] += CoeffStep[c][1];
|
||||
coeffs = vld1q_f32(&Coeffs[c][0]);
|
||||
deltas = vld1q_f32(&CoeffStep[c][0]);
|
||||
coeffs = vaddq_f32(coeffs, deltas);
|
||||
vst1q_f32(&Coeffs[c][0], coeffs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,3 +58,73 @@ static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
|
||||
#define SUFFIX Neon
|
||||
#include "mixer_inc.c"
|
||||
#undef SUFFIX
|
||||
|
||||
|
||||
void MixDirect_Neon(const DirectParams *params, const ALfloat *restrict data, ALuint srcchan,
|
||||
ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
|
||||
{
|
||||
ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
|
||||
ALfloat *restrict ClickRemoval = params->ClickRemoval;
|
||||
ALfloat *restrict PendingClicks = params->PendingClicks;
|
||||
ALfloat DrySend;
|
||||
float32x4_t gain;
|
||||
ALuint pos;
|
||||
ALuint c;
|
||||
|
||||
for(c = 0;c < MaxChannels;c++)
|
||||
{
|
||||
DrySend = params->Gains[srcchan][c];
|
||||
if(!(DrySend > GAIN_SILENCE_THRESHOLD))
|
||||
continue;
|
||||
|
||||
if(OutPos == 0)
|
||||
ClickRemoval[c] -= data[0]*DrySend;
|
||||
|
||||
gain = vdupq_n_f32(DrySend);
|
||||
for(pos = 0;BufferSize-pos > 3;pos += 4)
|
||||
{
|
||||
const float32x4_t val4 = vld1q_f32(&data[pos]);
|
||||
float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
|
||||
dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain));
|
||||
vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
|
||||
}
|
||||
for(;pos < BufferSize;pos++)
|
||||
OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
|
||||
|
||||
if(OutPos+pos == SamplesToDo)
|
||||
PendingClicks[c] += data[pos]*DrySend;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MixSend_Neon(const SendParams *params, const ALfloat *restrict data,
|
||||
ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
|
||||
{
|
||||
ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
|
||||
ALfloat *restrict ClickRemoval = params->ClickRemoval;
|
||||
ALfloat *restrict PendingClicks = params->PendingClicks;
|
||||
ALfloat WetGain;
|
||||
float32x4_t gain;
|
||||
ALuint pos;
|
||||
|
||||
WetGain = params->Gain;
|
||||
if(!(WetGain > GAIN_SILENCE_THRESHOLD))
|
||||
return;
|
||||
|
||||
if(OutPos == 0)
|
||||
ClickRemoval[0] -= data[0] * WetGain;
|
||||
|
||||
gain = vdupq_n_f32(WetGain);
|
||||
for(pos = 0;BufferSize-pos > 3;pos += 4)
|
||||
{
|
||||
const float32x4_t val4 = vld1q_f32(&data[pos]);
|
||||
float32x4_t wet4 = vld1q_f32(&OutBuffer[0][OutPos+pos]);
|
||||
wet4 = vaddq_f32(wet4, vmulq_f32(val4, gain));
|
||||
vst1q_f32(&OutBuffer[0][OutPos+pos], wet4);
|
||||
}
|
||||
for(;pos < BufferSize;pos++)
|
||||
OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
|
||||
|
||||
if(OutPos+pos == SamplesToDo)
|
||||
PendingClicks[0] += data[pos] * WetGain;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user