Use SSE for applying the HQ B-Format decoder matrices
This commit is contained in:
parent
72d2febccb
commit
5e64882be9
@ -3,6 +3,7 @@
|
||||
|
||||
#include "bformatdec.h"
|
||||
#include "ambdec.h"
|
||||
#include "mixer_defs.h"
|
||||
#include "alu.h"
|
||||
|
||||
#include "threads.h"
|
||||
@ -151,12 +152,27 @@ static const ALfloat CubeMatrixLF[8][MAX_AMBI_COEFFS] = {
|
||||
};
|
||||
static ALfloat CubeEncoder[8][MAX_AMBI_COEFFS];
|
||||
|
||||
static alonce_flag encoder_inited = AL_ONCE_FLAG_INIT;
|
||||
|
||||
static void init_encoder(void)
|
||||
static inline MatrixMixerFunc SelectMixer(void)
|
||||
{
|
||||
#ifdef HAVE_SSE
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
return MixRow_SSE;
|
||||
#endif
|
||||
return MixRow_C;
|
||||
}
|
||||
|
||||
static MatrixMixerFunc MixMatrixRow = MixRow_C;
|
||||
|
||||
|
||||
static alonce_flag bformatdec_inited = AL_ONCE_FLAG_INIT;
|
||||
|
||||
static void init_bformatdec(void)
|
||||
{
|
||||
ALuint i, j;
|
||||
|
||||
MixMatrixRow = SelectMixer();
|
||||
|
||||
CalcXYZCoeffs(-0.577350269f, 0.577350269f, -0.577350269f, 0.0f, CubeEncoder[0]);
|
||||
CalcXYZCoeffs( 0.577350269f, 0.577350269f, -0.577350269f, 0.0f, CubeEncoder[1]);
|
||||
CalcXYZCoeffs(-0.577350269f, 0.577350269f, 0.577350269f, 0.0f, CubeEncoder[2]);
|
||||
@ -226,7 +242,7 @@ typedef struct BFormatDec {
|
||||
|
||||
BFormatDec *bformatdec_alloc()
|
||||
{
|
||||
alcall_once(&encoder_inited, init_encoder);
|
||||
alcall_once(&bformatdec_inited, init_bformatdec);
|
||||
return al_calloc(16, sizeof(BFormatDec));
|
||||
}
|
||||
|
||||
@ -435,20 +451,6 @@ void bformatdec_reset(BFormatDec *dec, const AmbDecConf *conf, ALuint chancount,
|
||||
}
|
||||
|
||||
|
||||
static void apply_row(ALfloat *out, const ALfloat *mtx, ALfloat (*restrict in)[BUFFERSIZE], ALuint inchans, ALuint todo)
|
||||
{
|
||||
ALuint c, i;
|
||||
|
||||
for(c = 0;c < inchans;c++)
|
||||
{
|
||||
ALfloat gain = mtx[c];
|
||||
if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
|
||||
continue;
|
||||
for(i = 0;i < todo;i++)
|
||||
out[i] += in[c][i] * gain;
|
||||
}
|
||||
}
|
||||
|
||||
void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint OutChannels, ALfloat (*restrict InSamples)[BUFFERSIZE], ALuint SamplesToDo)
|
||||
{
|
||||
ALuint chan, i;
|
||||
@ -465,9 +467,9 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU
|
||||
continue;
|
||||
|
||||
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
|
||||
apply_row(dec->ChannelMix, dec->MatrixHF[chan], dec->SamplesHF,
|
||||
MixMatrixRow(dec->ChannelMix, dec->MatrixHF[chan], dec->SamplesHF,
|
||||
dec->NumChannels, SamplesToDo);
|
||||
apply_row(dec->ChannelMix, dec->MatrixLF[chan], dec->SamplesLF,
|
||||
MixMatrixRow(dec->ChannelMix, dec->MatrixLF[chan], dec->SamplesLF,
|
||||
dec->NumChannels, SamplesToDo);
|
||||
|
||||
if(dec->Delay[chan].Length > 0)
|
||||
@ -504,7 +506,7 @@ void bformatdec_process(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[BU
|
||||
continue;
|
||||
|
||||
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
|
||||
apply_row(dec->ChannelMix, dec->MatrixHF[chan], InSamples,
|
||||
MixMatrixRow(dec->ChannelMix, dec->MatrixHF[chan], InSamples,
|
||||
dec->NumChannels, SamplesToDo);
|
||||
|
||||
if(dec->Delay[chan].Length > 0)
|
||||
@ -556,9 +558,9 @@ void bformatdec_upSample(struct BFormatDec *dec, ALfloat (*restrict OutBuffer)[B
|
||||
for(k = 0;k < dec->UpSampler.NumChannels;k++)
|
||||
{
|
||||
memset(dec->ChannelMix, 0, SamplesToDo*sizeof(ALfloat));
|
||||
apply_row(dec->ChannelMix, dec->UpSampler.MatrixHF[k], dec->SamplesHF,
|
||||
MixMatrixRow(dec->ChannelMix, dec->UpSampler.MatrixHF[k], dec->SamplesHF,
|
||||
InChannels, SamplesToDo);
|
||||
apply_row(dec->ChannelMix, dec->UpSampler.MatrixLF[k], dec->SamplesLF,
|
||||
MixMatrixRow(dec->ChannelMix, dec->UpSampler.MatrixLF[k], dec->SamplesLF,
|
||||
InChannels, SamplesToDo);
|
||||
|
||||
for(j = 0;j < dec->NumChannels;j++)
|
||||
|
@ -167,3 +167,24 @@ void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[B
|
||||
OutBuffer[c][OutPos+pos] += data[pos]*gain;
|
||||
}
|
||||
}
|
||||
|
||||
/* Basically the inverse of the above. Rather than one input going to multiple
|
||||
* outputs (each with its own gain), it's multiple inputs (each with its own
|
||||
* gain) going to one output. This applies one row (vs one column) of a matrix
|
||||
* transform. And as the matrices are more or less static once set up, no
|
||||
* stepping is necessary.
|
||||
*/
|
||||
void MixRow_C(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize)
|
||||
{
|
||||
ALuint c, i;
|
||||
|
||||
for(c = 0;c < InChans;c++)
|
||||
{
|
||||
ALfloat gain = Mtx[c];
|
||||
if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
|
||||
continue;
|
||||
|
||||
for(i = 0;i < BufferSize;i++)
|
||||
OutBuffer[i] += data[c][i] * gain;
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,8 @@ void MixHrtf_C(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ri
|
||||
struct HrtfState *hrtfstate, ALuint BufferSize);
|
||||
void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
|
||||
struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
|
||||
void MixRow_C(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE],
|
||||
ALuint InChans, ALuint BufferSize);
|
||||
|
||||
/* SSE mixers */
|
||||
void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ridx,
|
||||
@ -35,6 +37,8 @@ void MixHrtf_SSE(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint
|
||||
struct HrtfState *hrtfstate, ALuint BufferSize);
|
||||
void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
|
||||
struct MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize);
|
||||
void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE],
|
||||
ALuint InChans, ALuint BufferSize);
|
||||
|
||||
/* SSE resamplers */
|
||||
inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size)
|
||||
|
@ -260,3 +260,28 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)
|
||||
OutBuffer[c][OutPos+pos] += data[pos]*gain;
|
||||
}
|
||||
}
|
||||
|
||||
void MixRow_SSE(ALfloat *OutBuffer, const ALfloat *Mtx, ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans, ALuint BufferSize)
|
||||
{
|
||||
__m128 gain4;
|
||||
ALuint c;
|
||||
|
||||
for(c = 0;c < InChans;c++)
|
||||
{
|
||||
ALuint pos = 0;
|
||||
ALfloat gain = Mtx[c];
|
||||
if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
|
||||
continue;
|
||||
|
||||
gain4 = _mm_set1_ps(gain);
|
||||
for(;BufferSize-pos > 3;pos += 4)
|
||||
{
|
||||
const __m128 val4 = _mm_load_ps(&data[c][pos]);
|
||||
__m128 dry4 = _mm_load_ps(&OutBuffer[pos]);
|
||||
dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain4));
|
||||
_mm_store_ps(&OutBuffer[pos], dry4);
|
||||
}
|
||||
for(;pos < BufferSize;pos++)
|
||||
OutBuffer[pos] += data[c][pos]*gain;
|
||||
}
|
||||
}
|
||||
|
@ -170,6 +170,9 @@ typedef const ALfloat* (*ResamplerFunc)(const BsincState *state,
|
||||
typedef void (*MixerFunc)(const ALfloat *data, ALuint OutChans,
|
||||
ALfloat (*restrict OutBuffer)[BUFFERSIZE], struct MixGains *Gains,
|
||||
ALuint Counter, ALuint OutPos, ALuint BufferSize);
|
||||
typedef void (*MatrixMixerFunc)(ALfloat *OutBuffer, const ALfloat *Mtx,
|
||||
ALfloat (*restrict data)[BUFFERSIZE], ALuint InChans,
|
||||
ALuint BufferSize);
|
||||
typedef void (*HrtfMixerFunc)(ALfloat (*restrict OutBuffer)[BUFFERSIZE], ALuint lidx, ALuint ridx,
|
||||
const ALfloat *data, ALuint Counter, ALuint Offset, ALuint OutPos,
|
||||
const ALuint IrSize, const MixHrtfParams *hrtfparams,
|
||||
|
Loading…
Reference in New Issue
Block a user