Use an interpolated FIR filter for cubic resampling
Similar to how the bsinc filters work, but optimized for 4-point filtering. At least the SSE version is notably faster than calculating the coefficients in real time.
This commit is contained in:
parent
0de7ea42fa
commit
da845ddd9c
@ -654,6 +654,9 @@ set(CORE_OBJS
|
||||
core/converter.h
|
||||
core/cpu_caps.cpp
|
||||
core/cpu_caps.h
|
||||
core/cubic_defs.h
|
||||
core/cubic_tables.cpp
|
||||
core/cubic_tables.h
|
||||
core/devformat.cpp
|
||||
core/devformat.h
|
||||
core/device.cpp
|
||||
|
11
alc/alu.cpp
11
alc/alu.cpp
@ -55,6 +55,7 @@
|
||||
#include "core/buffer_storage.h"
|
||||
#include "core/context.h"
|
||||
#include "core/cpu_caps.h"
|
||||
#include "core/cubic_tables.h"
|
||||
#include "core/devformat.h"
|
||||
#include "core/device.h"
|
||||
#include "core/effects/base.h"
|
||||
@ -211,6 +212,14 @@ inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
|
||||
#endif
|
||||
return Resample_<LerpTag,CTag>;
|
||||
case Resampler::Cubic:
|
||||
#ifdef HAVE_NEON
|
||||
if((CPUCapFlags&CPU_CAP_NEON))
|
||||
return Resample_<CubicTag,NEONTag>;
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
if((CPUCapFlags&CPU_CAP_SSE))
|
||||
return Resample_<CubicTag,SSETag>;
|
||||
#endif
|
||||
return Resample_<CubicTag,CTag>;
|
||||
case Resampler::BSinc12:
|
||||
case Resampler::BSinc24:
|
||||
@ -262,7 +271,9 @@ ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState
|
||||
{
|
||||
case Resampler::Point:
|
||||
case Resampler::Linear:
|
||||
break;
|
||||
case Resampler::Cubic:
|
||||
state->cubic.filter = gCubicSpline.Tab;
|
||||
break;
|
||||
case Resampler::FastBSinc12:
|
||||
case Resampler::BSinc12:
|
||||
|
13
core/cubic_defs.h
Normal file
13
core/cubic_defs.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef CORE_CUBIC_DEFS_H
|
||||
#define CORE_CUBIC_DEFS_H
|
||||
|
||||
/* The number of distinct phase intervals within the cubic filter tables. */
|
||||
constexpr unsigned int CubicPhaseBits{5};
|
||||
constexpr unsigned int CubicPhaseCount{1 << CubicPhaseBits};
|
||||
|
||||
struct CubicCoefficients {
|
||||
float mCoeffs[4];
|
||||
float mDeltas[4];
|
||||
};
|
||||
|
||||
#endif /* CORE_CUBIC_DEFS_H */
|
59
core/cubic_tables.cpp
Normal file
59
core/cubic_tables.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
|
||||
#include "cubic_tables.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "alnumbers.h"
|
||||
#include "core/mixer/defs.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
using uint = unsigned int;
|
||||
|
||||
struct SplineFilterArray {
|
||||
alignas(16) CubicCoefficients mTable[CubicPhaseCount]{};
|
||||
|
||||
constexpr SplineFilterArray()
|
||||
{
|
||||
/* Fill in the main coefficients. */
|
||||
for(size_t pi{0};pi < CubicPhaseCount;++pi)
|
||||
{
|
||||
const double mu{pi / double{CubicPhaseCount}};
|
||||
const double mu2{mu*mu}, mu3{mu2*mu};
|
||||
mTable[pi].mCoeffs[0] = static_cast<float>(-0.5*mu3 + mu2 + -0.5*mu);
|
||||
mTable[pi].mCoeffs[1] = static_cast<float>( 1.5*mu3 + -2.5*mu2 + 1.0);
|
||||
mTable[pi].mCoeffs[2] = static_cast<float>(-1.5*mu3 + 2.0*mu2 + 0.5*mu);
|
||||
mTable[pi].mCoeffs[3] = static_cast<float>( 0.5*mu3 + -0.5*mu2);
|
||||
}
|
||||
|
||||
/* Fill in the coefficient deltas. */
|
||||
for(size_t pi{0};pi < CubicPhaseCount-1;++pi)
|
||||
{
|
||||
mTable[pi].mDeltas[0] = mTable[pi+1].mCoeffs[0] - mTable[pi].mCoeffs[0];
|
||||
mTable[pi].mDeltas[1] = mTable[pi+1].mCoeffs[1] - mTable[pi].mCoeffs[1];
|
||||
mTable[pi].mDeltas[2] = mTable[pi+1].mCoeffs[2] - mTable[pi].mCoeffs[2];
|
||||
mTable[pi].mDeltas[3] = mTable[pi+1].mCoeffs[3] - mTable[pi].mCoeffs[3];
|
||||
}
|
||||
|
||||
const size_t pi{CubicPhaseCount - 1};
|
||||
mTable[pi].mDeltas[0] = -mTable[pi].mCoeffs[0];
|
||||
mTable[pi].mDeltas[1] = -mTable[pi].mCoeffs[1];
|
||||
mTable[pi].mDeltas[2] = 1.0f - mTable[pi].mCoeffs[2];
|
||||
mTable[pi].mDeltas[3] = -mTable[pi].mCoeffs[3];
|
||||
}
|
||||
|
||||
constexpr const CubicCoefficients *getTable() const noexcept { return mTable; }
|
||||
};
|
||||
|
||||
constexpr SplineFilterArray SplineFilter{};
|
||||
|
||||
} // namespace
|
||||
|
||||
const CubicTable gCubicSpline{SplineFilter.getTable()};
|
16
core/cubic_tables.h
Normal file
16
core/cubic_tables.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef CORE_CUBIC_TABLES_H
|
||||
#define CORE_CUBIC_TABLES_H
|
||||
|
||||
#include "cubic_defs.h"
|
||||
|
||||
|
||||
struct CubicTable {
|
||||
const CubicCoefficients *Tab;
|
||||
};
|
||||
|
||||
/* A Catmull-Rom spline. The spline passes through the center two samples,
|
||||
* ensuring no discontinuity while moving through a series of samples.
|
||||
*/
|
||||
extern const CubicTable gCubicSpline;
|
||||
|
||||
#endif /* CORE_CUBIC_TABLES_H */
|
@ -8,6 +8,7 @@
|
||||
#include "core/bufferline.h"
|
||||
#include "core/resampler_limits.h"
|
||||
|
||||
struct CubicCoefficients;
|
||||
struct HrtfChannelState;
|
||||
struct HrtfFilter;
|
||||
struct MixHrtfFilter;
|
||||
@ -51,7 +52,15 @@ struct BsincState {
|
||||
const float *filter;
|
||||
};
|
||||
|
||||
struct CubicState {
|
||||
/* Filter coefficients, and coefficient deltas. Starting at phase index 0,
|
||||
* each subsequent phase index follows contiguously.
|
||||
*/
|
||||
const CubicCoefficients *filter;
|
||||
};
|
||||
|
||||
union InterpState {
|
||||
CubicState cubic;
|
||||
BsincState bsinc;
|
||||
};
|
||||
|
||||
|
@ -5,7 +5,8 @@
|
||||
#include <limits>
|
||||
|
||||
#include "alnumeric.h"
|
||||
#include "core/bsinc_tables.h"
|
||||
#include "core/bsinc_defs.h"
|
||||
#include "core/cubic_defs.h"
|
||||
#include "defs.h"
|
||||
#include "hrtfbase.h"
|
||||
|
||||
@ -20,23 +21,39 @@ struct FastBSincTag;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
|
||||
constexpr uint BsincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint BsincPhaseDiffOne{1 << BsincPhaseBitDiff};
|
||||
|
||||
constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
|
||||
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
|
||||
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
|
||||
|
||||
inline float do_point(const InterpState&, const float *RESTRICT vals, const uint)
|
||||
{ return vals[0]; }
|
||||
inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac)
|
||||
{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
|
||||
inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint frac)
|
||||
{ return cubic(vals[0], vals[1], vals[2], vals[3], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
|
||||
inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac)
|
||||
{
|
||||
/* Calculate the phase index and factor. */
|
||||
const uint pi{frac >> CubicPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
|
||||
|
||||
const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(istate.cubic.filter + pi);
|
||||
|
||||
// Apply the phase interpolated filter.
|
||||
return (filter->mCoeffs[0] + pf*filter->mDeltas[0]) * vals[0]
|
||||
+ (filter->mCoeffs[1] + pf*filter->mDeltas[1]) * vals[1]
|
||||
+ (filter->mCoeffs[2] + pf*filter->mDeltas[2]) * vals[2]
|
||||
+ (filter->mCoeffs[3] + pf*filter->mDeltas[3]) * vals[3];
|
||||
}
|
||||
inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
|
||||
{
|
||||
const size_t m{istate.bsinc.m};
|
||||
ASSUME(m > 0);
|
||||
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BsincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};
|
||||
|
||||
const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
|
||||
const float *RESTRICT phd{fil + m};
|
||||
@ -55,8 +72,8 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals,
|
||||
ASSUME(m > 0);
|
||||
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BsincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};
|
||||
|
||||
const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
|
||||
const float *RESTRICT phd{fil + m};
|
||||
|
@ -7,11 +7,13 @@
|
||||
|
||||
#include "alnumeric.h"
|
||||
#include "core/bsinc_defs.h"
|
||||
#include "core/cubic_defs.h"
|
||||
#include "defs.h"
|
||||
#include "hrtfbase.h"
|
||||
|
||||
struct NEONTag;
|
||||
struct LerpTag;
|
||||
struct CubicTag;
|
||||
struct BSincTag;
|
||||
struct FastBSincTag;
|
||||
|
||||
@ -22,6 +24,14 @@ struct FastBSincTag;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff};
|
||||
constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
|
||||
|
||||
constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
|
||||
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
|
||||
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
|
||||
|
||||
inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
|
||||
{
|
||||
float32x4_t ret{vmovq_n_f32(l0)};
|
||||
@ -31,9 +41,6 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
|
||||
return ret;
|
||||
}
|
||||
|
||||
constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
|
||||
|
||||
inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
|
||||
const float left, const float right)
|
||||
{
|
||||
@ -183,6 +190,37 @@ float *Resample_<LerpTag,NEONTag>(const InterpState*, float *RESTRICT src, uint
|
||||
return dst.data();
|
||||
}
|
||||
|
||||
template<>
|
||||
float *Resample_<CubicTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
|
||||
uint increment, const al::span<float> dst)
|
||||
{
|
||||
const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
|
||||
|
||||
src -= 1;
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
const uint pi{frac >> CubicPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
|
||||
const float32x4_t pf4{vdupq_n_f32(pf)};
|
||||
|
||||
/* Apply the phase interpolated filter. */
|
||||
|
||||
/* f = fil + pf*phd */
|
||||
const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4,
|
||||
vld1q_f32(filter[pi].mDeltas));
|
||||
/* r = f*src */
|
||||
float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))};
|
||||
|
||||
r4 = vaddq_f32(r4, vrev64q_f32(r4));
|
||||
out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);
|
||||
|
||||
frac += increment;
|
||||
src += frac>>MixerFracBits;
|
||||
frac &= MixerFracMask;
|
||||
}
|
||||
return dst.data();
|
||||
}
|
||||
|
||||
template<>
|
||||
float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
|
||||
uint increment, const al::span<float> dst)
|
||||
@ -196,8 +234,8 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BSincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
|
||||
|
||||
// Apply the scale and phase interpolated filter.
|
||||
float32x4_t r4{vdupq_n_f32(0.0f)};
|
||||
@ -242,8 +280,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BSincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
|
||||
|
||||
// Apply the phase interpolated filter.
|
||||
float32x4_t r4{vdupq_n_f32(0.0f)};
|
||||
|
@ -7,10 +7,12 @@
|
||||
|
||||
#include "alnumeric.h"
|
||||
#include "core/bsinc_defs.h"
|
||||
#include "core/cubic_defs.h"
|
||||
#include "defs.h"
|
||||
#include "hrtfbase.h"
|
||||
|
||||
struct SSETag;
|
||||
struct CubicTag;
|
||||
struct BSincTag;
|
||||
struct FastBSincTag;
|
||||
|
||||
@ -21,8 +23,13 @@ struct FastBSincTag;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
|
||||
constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
|
||||
constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff};
|
||||
constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};
|
||||
|
||||
constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
|
||||
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
|
||||
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};
|
||||
|
||||
#define MLA4(x, y, z) _mm_add_ps(x, _mm_mul_ps(y, z))
|
||||
|
||||
@ -146,6 +153,38 @@ force_inline void MixLine(const al::span<const float> InSamples, float *RESTRICT
|
||||
|
||||
} // namespace
|
||||
|
||||
template<>
|
||||
float *Resample_<CubicTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac,
|
||||
uint increment, const al::span<float> dst)
|
||||
{
|
||||
const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);
|
||||
|
||||
src -= 1;
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
const uint pi{frac >> CubicPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
|
||||
const __m128 pf4{_mm_set1_ps(pf)};
|
||||
|
||||
/* Apply the phase interpolated filter. */
|
||||
|
||||
/* f = fil + pf*phd */
|
||||
const __m128 f4 = MLA4(_mm_load_ps(filter[pi].mCoeffs), pf4,
|
||||
_mm_load_ps(filter[pi].mDeltas));
|
||||
/* r = f*src */
|
||||
__m128 r4{_mm_mul_ps(f4, _mm_loadu_ps(src))};
|
||||
|
||||
r4 = _mm_add_ps(r4, _mm_shuffle_ps(r4, r4, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
r4 = _mm_add_ps(r4, _mm_movehl_ps(r4, r4));
|
||||
out_sample = _mm_cvtss_f32(r4);
|
||||
|
||||
frac += increment;
|
||||
src += frac>>MixerFracBits;
|
||||
frac &= MixerFracMask;
|
||||
}
|
||||
return dst.data();
|
||||
}
|
||||
|
||||
template<>
|
||||
float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src, uint frac,
|
||||
uint increment, const al::span<float> dst)
|
||||
@ -159,8 +198,8 @@ float *Resample_<BSincTag,SSETag>(const InterpState *state, float *RESTRICT src,
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BSincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
|
||||
|
||||
// Apply the scale and phase interpolated filter.
|
||||
__m128 r4{_mm_setzero_ps()};
|
||||
@ -206,8 +245,8 @@ float *Resample_<FastBSincTag,SSETag>(const InterpState *state, float *RESTRICT
|
||||
for(float &out_sample : dst)
|
||||
{
|
||||
// Calculate the phase index and factor.
|
||||
const uint pi{frac >> FracPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
|
||||
const uint pi{frac >> BSincPhaseBitDiff};
|
||||
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};
|
||||
|
||||
// Apply the phase interpolated filter.
|
||||
__m128 r4{_mm_setzero_ps()};
|
||||
|
Loading…
Reference in New Issue
Block a user