2016-08-22 18:43:47 +00:00
//--------------------------------------------------------------------------------------
// File: XDSP.h
//
// DirectXMath based Digital Signal Processing (DSP) functions for audio,
// primarily Fast Fourier Transform (FFT)
//
// All buffer parameters must be 16-byte aligned
//
// All FFT functions support only single-precision floating-point audio
//
2021-02-27 07:17:26 +00:00
// Copyright (c) Microsoft Corporation.
2018-02-24 07:16:27 +00:00
// Licensed under the MIT License.
2016-08-22 18:43:47 +00:00
//
// http://go.microsoft.com/fwlink/?LinkID=615557
//--------------------------------------------------------------------------------------
# pragma once
2020-03-26 18:24:50 +00:00
# include <cassert>
2019-07-31 19:26:12 +00:00
# include <DirectXMath.h>
2016-08-22 18:43:47 +00:00
2020-03-26 18:24:50 +00:00
# include <cstdint>
# include <cstring>
2019-07-31 19:26:12 +00:00
2021-11-29 21:08:15 +00:00
# ifdef _MSC_VER
2016-08-22 18:43:47 +00:00
# pragma warning(push)
2019-12-12 04:16:59 +00:00
# pragma warning(disable: 6001 6262)
2021-11-29 21:08:15 +00:00
# endif
2016-08-22 18:43:47 +00:00
2023-05-05 20:22:46 +00:00
# ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wunknown-warning-option"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# endif
2016-08-22 18:43:47 +00:00
namespace XDSP
{
2019-07-31 19:26:12 +00:00
using XMVECTOR = DirectX : : XMVECTOR ;
using FXMVECTOR = DirectX : : FXMVECTOR ;
using GXMVECTOR = DirectX : : GXMVECTOR ;
using CXMVECTOR = DirectX : : CXMVECTOR ;
2021-07-14 21:01:11 +00:00
using XMFLOAT4A = DirectX : : XMFLOAT4A ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
inline bool ISPOWEROF2 ( size_t n ) { return ( ( ( n ) & ( ( n ) - 1 ) ) = = 0 & & ( n ) ! = 0 ) ; }
2016-08-22 18:43:47 +00:00
// Parallel multiplication of four complex numbers, assuming real and imaginary values are stored in separate vectors.
2019-07-31 19:26:12 +00:00
inline void XM_CALLCONV vmulComplex (
_Out_ XMVECTOR & rResult , _Out_ XMVECTOR & iResult ,
2019-12-12 04:16:59 +00:00
_In_ FXMVECTOR r1 , _In_ FXMVECTOR i1 , _In_ FXMVECTOR r2 , _In_ GXMVECTOR i2 ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
// (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
2021-07-14 21:01:11 +00:00
const XMVECTOR vr1r2 = XMVectorMultiply ( r1 , r2 ) ;
const XMVECTOR vr1i2 = XMVectorMultiply ( r1 , i2 ) ;
2019-07-31 19:26:12 +00:00
rResult = XMVectorNegativeMultiplySubtract ( i1 , i2 , vr1r2 ) ; // real: (r1*r2 - i1*i2)
iResult = XMVectorMultiplyAdd ( r2 , i1 , vr1i2 ) ; // imaginary: (r1*i2 + r2*i1)
}
2016-08-22 18:43:47 +00:00
2019-07-31 19:26:12 +00:00
inline void XM_CALLCONV vmulComplex (
2019-12-12 04:16:59 +00:00
_Inout_ XMVECTOR & r1 , _Inout_ XMVECTOR & i1 , _In_ FXMVECTOR r2 , _In_ FXMVECTOR i2 ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
// (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1)
2021-07-14 21:01:11 +00:00
const XMVECTOR vr1r2 = XMVectorMultiply ( r1 , r2 ) ;
const XMVECTOR vr1i2 = XMVectorMultiply ( r1 , i2 ) ;
2019-07-31 19:26:12 +00:00
r1 = XMVectorNegativeMultiplySubtract ( i1 , i2 , vr1r2 ) ; // real: (r1*r2 - i1*i2)
i1 = XMVectorMultiplyAdd ( r2 , i1 , vr1i2 ) ; // imaginary: (r1*i2 + r2*i1)
}
2016-08-22 18:43:47 +00:00
//----------------------------------------------------------------------------------
// Radix-4 decimation-in-time FFT butterfly.
// This version assumes that all four elements of the butterfly are
// adjacent in a single vector.
//
// Compute the product of the complex input vector and the
// 4-element DFT matrix:
// | 1 1 1 1 | | (r1X,i1X) |
// | 1 -j -1 j | | (r1Y,i1Y) |
// | 1 -1 1 -1 | | (r1Z,i1Z) |
// | 1 j -1 -j | | (r1W,i1W) |
//
// This matrix can be decomposed into two simpler ones to reduce the
// number of additions needed. The decomposed matrices look like this:
// | 1 0 1 0 | | 1 0 1 0 |
// | 0 1 0 -j | | 1 0 -1 0 |
// | 1 0 -1 0 | | 0 1 0 1 |
// | 0 1 0 j | | 0 1 0 -1 |
//
// Combine as follows:
// | 1 0 1 0 | | (r1X,i1X) | | (r1X + r1Z, i1X + i1Z) |
// Temp = | 1 0 -1 0 | * | (r1Y,i1Y) | = | (r1X - r1Z, i1X - i1Z) |
// | 0 1 0 1 | | (r1Z,i1Z) | | (r1Y + r1W, i1Y + i1W) |
// | 0 1 0 -1 | | (r1W,i1W) | | (r1Y - r1W, i1Y - i1W) |
//
// | 1 0 1 0 | | (rTempX,iTempX) | | (rTempX + rTempZ, iTempX + iTempZ) |
// Result = | 0 1 0 -j | * | (rTempY,iTempY) | = | (rTempY + iTempW, iTempY - rTempW) |
// | 1 0 -1 0 | | (rTempZ,iTempZ) | | (rTempX - rTempZ, iTempX - iTempZ) |
// | 0 1 0 j | | (rTempW,iTempW) | | (rTempY - iTempW, iTempY + rTempW) |
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void ButterflyDIT4_1 ( _Inout_ XMVECTOR & r1 , _Inout_ XMVECTOR & i1 ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
// sign constants for radix-4 butterflies
2021-07-14 21:01:11 +00:00
static const XMVECTORF32 vDFT4SignBits1 = { { { 1.0f , - 1.0f , 1.0f , - 1.0f } } } ;
static const XMVECTORF32 vDFT4SignBits2 = { { { 1.0f , 1.0f , - 1.0f , - 1.0f } } } ;
static const XMVECTORF32 vDFT4SignBits3 = { { { 1.0f , - 1.0f , - 1.0f , 1.0f } } } ;
2016-08-22 18:43:47 +00:00
// calculating Temp
// [r1X| r1X|r1Y| r1Y] + [r1Z|-r1Z|r1W|-r1W]
// [i1X| i1X|i1Y| i1Y] + [i1Z|-i1Z|i1W|-i1W]
2021-07-14 21:01:11 +00:00
const XMVECTOR r1L = XMVectorSwizzle < 0 , 0 , 1 , 1 > ( r1 ) ;
const XMVECTOR r1H = XMVectorSwizzle < 2 , 2 , 3 , 3 > ( r1 ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
const XMVECTOR i1L = XMVectorSwizzle < 0 , 0 , 1 , 1 > ( i1 ) ;
const XMVECTOR i1H = XMVectorSwizzle < 2 , 2 , 3 , 3 > ( i1 ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
const XMVECTOR rTemp = XMVectorMultiplyAdd ( r1H , vDFT4SignBits1 , r1L ) ;
const XMVECTOR iTemp = XMVectorMultiplyAdd ( i1H , vDFT4SignBits1 , i1L ) ;
2016-08-22 18:43:47 +00:00
// calculating Result
2021-07-14 21:01:11 +00:00
const XMVECTOR rZrWiZiW = XMVectorPermute < 2 , 3 , 6 , 7 > ( rTemp , iTemp ) ; // [rTempZ|rTempW|iTempZ|iTempW]
const XMVECTOR rZiWrZiW = XMVectorSwizzle < 0 , 3 , 0 , 3 > ( rZrWiZiW ) ; // [rTempZ|iTempW|rTempZ|iTempW]
const XMVECTOR iZrWiZrW = XMVectorSwizzle < 2 , 1 , 2 , 1 > ( rZrWiZiW ) ; // [rTempZ|iTempW|rTempZ|iTempW]
2016-08-22 18:43:47 +00:00
// [rTempX| rTempY| rTempX| rTempY] + [rTempZ| iTempW|-rTempZ|-iTempW]
// [iTempX| iTempY| iTempX| iTempY] + // [iTempZ|-rTempW|-iTempZ| rTempW]
2021-07-14 21:01:11 +00:00
const XMVECTOR rTempL = XMVectorSwizzle < 0 , 1 , 0 , 1 > ( rTemp ) ;
const XMVECTOR iTempL = XMVectorSwizzle < 0 , 1 , 0 , 1 > ( iTemp ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
r1 = XMVectorMultiplyAdd ( rZiWrZiW , vDFT4SignBits2 , rTempL ) ;
i1 = XMVectorMultiplyAdd ( iZrWiZrW , vDFT4SignBits3 , iTempL ) ;
2016-08-22 18:43:47 +00:00
}
//----------------------------------------------------------------------------------
// Radix-4 decimation-in-time FFT butterfly.
// This version assumes that elements of the butterfly are
// in different vectors, so that each vector in the input
// contains elements from four different butterflies.
// The four separate butterflies are processed in parallel.
//
// The calculations here are the same as the ones in the single-vector
// radix-4 DFT, but instead of being done on a single vector (X,Y,Z,W)
// they are done in parallel on sixteen independent complex values.
// There is no interdependence between the vector elements:
// | 1 0 1 0 | | (rIn0,iIn0) | | (rIn0 + rIn2, iIn0 + iIn2) |
// | 1 0 -1 0 | * | (rIn1,iIn1) | = Temp = | (rIn0 - rIn2, iIn0 - iIn2) |
// | 0 1 0 1 | | (rIn2,iIn2) | | (rIn1 + rIn3, iIn1 + iIn3) |
// | 0 1 0 -1 | | (rIn3,iIn3) | | (rIn1 - rIn3, iIn1 - iIn3) |
//
// | 1 0 1 0 | | (rTemp0,iTemp0) | | (rTemp0 + rTemp2, iTemp0 + iTemp2) |
// Result = | 0 1 0 -j | * | (rTemp1,iTemp1) | = | (rTemp1 + iTemp3, iTemp1 - rTemp3) |
// | 1 0 -1 0 | | (rTemp2,iTemp2) | | (rTemp0 - rTemp2, iTemp0 - iTemp2) |
// | 0 1 0 j | | (rTemp3,iTemp3) | | (rTemp1 - iTemp3, iTemp1 + rTemp3) |
//----------------------------------------------------------------------------------
2019-07-31 19:26:12 +00:00
inline void ButterflyDIT4_4 (
_Inout_ XMVECTOR & r0 ,
_Inout_ XMVECTOR & r1 ,
_Inout_ XMVECTOR & r2 ,
_Inout_ XMVECTOR & r3 ,
_Inout_ XMVECTOR & i0 ,
_Inout_ XMVECTOR & i1 ,
_Inout_ XMVECTOR & i2 ,
_Inout_ XMVECTOR & i3 ,
_In_reads_ ( uStride * 4 ) const XMVECTOR * __restrict pUnityTableReal ,
_In_reads_ ( uStride * 4 ) const XMVECTOR * __restrict pUnityTableImaginary ,
_In_ size_t uStride ,
2019-12-12 04:16:59 +00:00
_In_ const bool fLast ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
assert ( pUnityTableReal ) ;
assert ( pUnityTableImaginary ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pUnityTableReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pUnityTableImaginary ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( ISPOWEROF2 ( uStride ) ) ;
// calculating Temp
2021-07-14 21:01:11 +00:00
const XMVECTOR rTemp0 = XMVectorAdd ( r0 , r2 ) ;
const XMVECTOR iTemp0 = XMVectorAdd ( i0 , i2 ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
const XMVECTOR rTemp2 = XMVectorAdd ( r1 , r3 ) ;
const XMVECTOR iTemp2 = XMVectorAdd ( i1 , i3 ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
const XMVECTOR rTemp1 = XMVectorSubtract ( r0 , r2 ) ;
const XMVECTOR iTemp1 = XMVectorSubtract ( i0 , i2 ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
const XMVECTOR rTemp3 = XMVectorSubtract ( r1 , r3 ) ;
const XMVECTOR iTemp3 = XMVectorSubtract ( i1 , i3 ) ;
2016-08-22 18:43:47 +00:00
2019-07-31 19:26:12 +00:00
XMVECTOR rTemp4 = XMVectorAdd ( rTemp0 , rTemp2 ) ;
2016-08-22 18:43:47 +00:00
XMVECTOR iTemp4 = XMVectorAdd ( iTemp0 , iTemp2 ) ;
2019-07-31 19:26:12 +00:00
XMVECTOR rTemp5 = XMVectorAdd ( rTemp1 , iTemp3 ) ;
2016-08-22 18:43:47 +00:00
XMVECTOR iTemp5 = XMVectorSubtract ( iTemp1 , rTemp3 ) ;
XMVECTOR rTemp6 = XMVectorSubtract ( rTemp0 , rTemp2 ) ;
XMVECTOR iTemp6 = XMVectorSubtract ( iTemp0 , iTemp2 ) ;
XMVECTOR rTemp7 = XMVectorSubtract ( rTemp1 , iTemp3 ) ;
XMVECTOR iTemp7 = XMVectorAdd ( iTemp1 , rTemp3 ) ;
// calculating Result
// vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal[0], pUnityTableImaginary[0]); // first one is always trivial
vmulComplex ( rTemp5 , iTemp5 , pUnityTableReal [ uStride ] , pUnityTableImaginary [ uStride ] ) ;
2019-07-31 19:26:12 +00:00
vmulComplex ( rTemp6 , iTemp6 , pUnityTableReal [ uStride * 2 ] , pUnityTableImaginary [ uStride * 2 ] ) ;
vmulComplex ( rTemp7 , iTemp7 , pUnityTableReal [ uStride * 3 ] , pUnityTableImaginary [ uStride * 3 ] ) ;
2016-08-22 18:43:47 +00:00
if ( fLast )
{
ButterflyDIT4_1 ( rTemp4 , iTemp4 ) ;
ButterflyDIT4_1 ( rTemp5 , iTemp5 ) ;
ButterflyDIT4_1 ( rTemp6 , iTemp6 ) ;
ButterflyDIT4_1 ( rTemp7 , iTemp7 ) ;
}
r0 = rTemp4 ; i0 = iTemp4 ;
r1 = rTemp5 ; i1 = iTemp5 ;
r2 = rTemp6 ; i2 = iTemp6 ;
r3 = rTemp7 ; i3 = iTemp7 ;
}
//==================================================================================
// F-U-N-C-T-I-O-N-S
//==================================================================================
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 4-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount elements
// pImaginary - [inout] imaginary components, must have at least uCount elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
2019-07-31 19:26:12 +00:00
inline void FFT4 (
_Inout_updates_ ( uCount ) XMVECTOR * __restrict pReal ,
_Inout_updates_ ( uCount ) XMVECTOR * __restrict pImaginary ,
2019-12-12 04:16:59 +00:00
const size_t uCount = 1 ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pReal ) ;
assert ( pImaginary ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( ISPOWEROF2 ( uCount ) ) ;
2019-07-31 19:26:12 +00:00
for ( size_t uIndex = 0 ; uIndex < uCount ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
ButterflyDIT4_1 ( pReal [ uIndex ] , pImaginary [ uIndex ] ) ;
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 8-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount*2 elements
// pImaginary - [inout] imaginary components, must have at least uCount*2 elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
2019-07-31 19:26:12 +00:00
inline void FFT8 (
_Inout_updates_ ( uCount * 2 ) XMVECTOR * __restrict pReal ,
_Inout_updates_ ( uCount * 2 ) XMVECTOR * __restrict pImaginary ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uCount = 1 ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
assert ( pReal ) ;
assert ( pImaginary ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( ISPOWEROF2 ( uCount ) ) ;
2021-07-14 21:01:11 +00:00
static const XMVECTORF32 wr1 = { { { 1.0f , 0.70710677f , 0.0f , - 0.70710677f } } } ;
2019-07-31 19:26:12 +00:00
static const XMVECTORF32 wi1 = { { { 0.0f , - 0.70710677f , - 1.0f , - 0.70710677f } } } ;
2021-07-14 21:01:11 +00:00
static const XMVECTORF32 wr2 = { { { - 1.0f , - 0.70710677f , 0.0f , 0.70710677f } } } ;
static const XMVECTORF32 wi2 = { { { 0.0f , 0.70710677f , 1.0f , 0.70710677f } } } ;
2016-08-22 18:43:47 +00:00
2019-07-31 19:26:12 +00:00
for ( size_t uIndex = 0 ; uIndex < uCount ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
2019-07-31 19:26:12 +00:00
XMVECTOR * __restrict pR = pReal + uIndex * 2 ;
XMVECTOR * __restrict pI = pImaginary + uIndex * 2 ;
2016-08-22 18:43:47 +00:00
2019-07-31 19:26:12 +00:00
XMVECTOR oddsR = XMVectorPermute < 1 , 3 , 5 , 7 > ( pR [ 0 ] , pR [ 1 ] ) ;
XMVECTOR evensR = XMVectorPermute < 0 , 2 , 4 , 6 > ( pR [ 0 ] , pR [ 1 ] ) ;
XMVECTOR oddsI = XMVectorPermute < 1 , 3 , 5 , 7 > ( pI [ 0 ] , pI [ 1 ] ) ;
XMVECTOR evensI = XMVectorPermute < 0 , 2 , 4 , 6 > ( pI [ 0 ] , pI [ 1 ] ) ;
2016-08-22 18:43:47 +00:00
ButterflyDIT4_1 ( oddsR , oddsI ) ;
ButterflyDIT4_1 ( evensR , evensI ) ;
XMVECTOR r , i ;
vmulComplex ( r , i , oddsR , oddsI , wr1 , wi1 ) ;
pR [ 0 ] = XMVectorAdd ( evensR , r ) ;
pI [ 0 ] = XMVectorAdd ( evensI , i ) ;
vmulComplex ( r , i , oddsR , oddsI , wr2 , wi2 ) ;
pR [ 1 ] = XMVectorAdd ( evensR , r ) ;
pI [ 1 ] = XMVectorAdd ( evensI , i ) ;
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 16-sample FFT.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least uCount*4 elements
// pImaginary - [inout] imaginary components, must have at least uCount*4 elements
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
2019-07-31 19:26:12 +00:00
inline void FFT16 (
_Inout_updates_ ( uCount * 4 ) XMVECTOR * __restrict pReal ,
_Inout_updates_ ( uCount * 4 ) XMVECTOR * __restrict pImaginary ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uCount = 1 ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
assert ( pReal ) ;
assert ( pImaginary ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( ISPOWEROF2 ( uCount ) ) ;
2019-07-31 19:26:12 +00:00
static const XMVECTORF32 aUnityTableReal [ 4 ] = {
{ { { 1.0f , 1.0f , 1.0f , 1.0f } } } ,
{ { { 1.0f , 0.92387950f , 0.70710677f , 0.38268343f } } } ,
{ { { 1.0f , 0.70710677f , - 4.3711388e-008 f , - 0.70710677f } } } ,
{ { { 1.0f , 0.38268343f , - 0.70710677f , - 0.92387950f } } }
} ;
static const XMVECTORF32 aUnityTableImaginary [ 4 ] =
{
{ { { - 0.0f , - 0.0f , - 0.0f , - 0.0f } } } ,
{ { { - 0.0f , - 0.38268343f , - 0.70710677f , - 0.92387950f } } } ,
{ { { - 0.0f , - 0.70710677f , - 1.0f , - 0.70710677f } } } ,
{ { { - 0.0f , - 0.92387950f , - 0.70710677f , 0.38268343f } } }
} ;
2016-08-22 18:43:47 +00:00
2019-07-31 19:26:12 +00:00
for ( size_t uIndex = 0 ; uIndex < uCount ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
2019-07-31 19:26:12 +00:00
ButterflyDIT4_4 ( pReal [ uIndex * 4 ] ,
pReal [ uIndex * 4 + 1 ] ,
pReal [ uIndex * 4 + 2 ] ,
pReal [ uIndex * 4 + 3 ] ,
pImaginary [ uIndex * 4 ] ,
pImaginary [ uIndex * 4 + 1 ] ,
pImaginary [ uIndex * 4 + 2 ] ,
pImaginary [ uIndex * 4 + 3 ] ,
reinterpret_cast < const XMVECTOR * > ( aUnityTableReal ) ,
reinterpret_cast < const XMVECTOR * > ( aUnityTableImaginary ) ,
1 , true ) ;
2016-08-22 18:43:47 +00:00
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// 2^N-sample FFT.
//
// REMARKS:
// For FFTs length 16 and below, call FFT16(), FFT8(), or FFT4().
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (uLength*uCount)/4 elements
// pImaginary - [inout] imaginary components, must have at least (uLength*uCount)/4 elements
// pUnityTable - [in] unity table, must have at least uLength*uCount elements, see FFTInitializeUnityTable()
// uLength - [in] FFT length in samples, must be a power of 2 > 16
// uCount - [in] number of FFT iterations
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void FFT (
2021-07-14 21:01:11 +00:00
_Inout_updates_ ( ( uLength * uCount ) / 4 ) XMVECTOR * __restrict pReal ,
_Inout_updates_ ( ( uLength * uCount ) / 4 ) XMVECTOR * __restrict pImaginary ,
_In_reads_ ( uLength * uCount ) const XMVECTOR * __restrict pUnityTable ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uLength ,
2021-07-14 21:01:11 +00:00
_In_ const size_t uCount = 1 ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pReal ) ;
assert ( pImaginary ) ;
assert ( pUnityTable ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pUnityTable ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( uLength > 16 ) ;
_Analysis_assume_ ( uLength > 16 ) ;
assert ( ISPOWEROF2 ( uLength ) ) ;
assert ( ISPOWEROF2 ( uCount ) ) ;
2021-07-14 21:01:11 +00:00
const XMVECTOR * __restrict pUnityTableReal = pUnityTable ;
const XMVECTOR * __restrict pUnityTableImaginary = pUnityTable + ( uLength > > 2 ) ;
2016-08-22 18:43:47 +00:00
const size_t uTotal = uCount * uLength ;
const size_t uTotal_vectors = uTotal > > 2 ;
const size_t uStage_vectors = uLength > > 2 ;
const size_t uStage_vectors_mask = uStage_vectors - 1 ;
const size_t uStride = uLength > > 4 ; // stride between butterfly elements
const size_t uStrideMask = uStride - 1 ;
const size_t uStride2 = uStride * 2 ;
const size_t uStride3 = uStride * 3 ;
const size_t uStrideInvMask = ~ uStrideMask ;
2021-07-14 21:01:11 +00:00
for ( size_t uIndex = 0 ; uIndex < ( uTotal_vectors > > 2 ) ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
const size_t n = ( ( uIndex & uStrideInvMask ) < < 2 ) + ( uIndex & uStrideMask ) ;
ButterflyDIT4_4 ( pReal [ n ] ,
pReal [ n + uStride ] ,
pReal [ n + uStride2 ] ,
pReal [ n + uStride3 ] ,
pImaginary [ n ] ,
pImaginary [ n + uStride ] ,
pImaginary [ n + uStride2 ] ,
pImaginary [ n + uStride3 ] ,
2021-07-14 21:01:11 +00:00
pUnityTableReal + ( n & uStage_vectors_mask ) ,
2016-08-22 18:43:47 +00:00
pUnityTableImaginary + ( n & uStage_vectors_mask ) ,
uStride , false ) ;
}
2021-07-14 21:01:11 +00:00
if ( uLength > 16 * 4 )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT ( pReal , pImaginary , pUnityTable + ( uLength > > 1 ) , uLength > > 2 , uCount * 4 ) ;
2016-08-22 18:43:47 +00:00
}
2021-07-14 21:01:11 +00:00
else if ( uLength = = 16 * 4 )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT16 ( pReal , pImaginary , uCount * 4 ) ;
2016-08-22 18:43:47 +00:00
}
2021-07-14 21:01:11 +00:00
else if ( uLength = = 8 * 4 )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT8 ( pReal , pImaginary , uCount * 4 ) ;
2016-08-22 18:43:47 +00:00
}
2021-07-14 21:01:11 +00:00
else if ( uLength = = 4 * 4 )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT4 ( pReal , pImaginary , uCount * 4 ) ;
2016-08-22 18:43:47 +00:00
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Initializes unity roots lookup table used by FFT functions.
// Once initialized, the table need not be initialized again unless a
// different FFT length is desired.
//
// REMARKS:
// The unity tables of FFT length 16 and below are hard coded into the
// respective FFT functions and so need not be initialized.
//
// PARAMETERS:
// pUnityTable - [out] unity table, receives unity roots lookup table, must have at least uLength elements
// uLength - [in] FFT length in frames, must be a power of 2 > 16
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void FFTInitializeUnityTable ( _Out_writes_ ( uLength ) XMVECTOR * __restrict pUnityTable , _In_ size_t uLength ) noexcept
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
using namespace DirectX ;
2016-08-22 18:43:47 +00:00
assert ( pUnityTable ) ;
assert ( uLength > 16 ) ;
_Analysis_assume_ ( uLength > 16 ) ;
assert ( ISPOWEROF2 ( uLength ) ) ;
// initialize unity table for recursive FFT lengths: uLength, uLength/4, uLength/16... > 16
2021-07-14 21:01:11 +00:00
// pUnityTable[0 to uLength*4-1] contains real components for current FFT length
// pUnityTable[uLength*4 to uLength*8-1] contains imaginary components for current FFT length
static const XMVECTORF32 vXM0123 = { { { 0.0f , 1.0f , 2.0f , 3.0f } } } ;
uLength > > = 2 ;
XMVECTOR vlStep = XMVectorReplicate ( XM_PIDIV2 / float ( uLength ) ) ;
2016-08-22 18:43:47 +00:00
do
{
uLength > > = 2 ;
2021-07-14 21:01:11 +00:00
XMVECTOR vJP = vXM0123 ;
for ( size_t j = 0 ; j < uLength ; + + j )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
XMVECTOR vSin , vCos ;
XMVECTOR viJP , vlS ;
pUnityTable [ j ] = g_XMOne ;
pUnityTable [ j + uLength * 4 ] = XMVectorZero ( ) ;
vlS = XMVectorMultiply ( vJP , vlStep ) ;
XMVectorSinCos ( & vSin , & vCos , vlS ) ;
pUnityTable [ j + uLength ] = vCos ;
pUnityTable [ j + uLength * 5 ] = XMVectorMultiply ( vSin , g_XMNegativeOne ) ;
viJP = XMVectorAdd ( vJP , vJP ) ;
vlS = XMVectorMultiply ( viJP , vlStep ) ;
XMVectorSinCos ( & vSin , & vCos , vlS ) ;
pUnityTable [ j + uLength * 2 ] = vCos ;
pUnityTable [ j + uLength * 6 ] = XMVectorMultiply ( vSin , g_XMNegativeOne ) ;
viJP = XMVectorAdd ( viJP , vJP ) ;
vlS = XMVectorMultiply ( viJP , vlStep ) ;
XMVectorSinCos ( & vSin , & vCos , vlS ) ;
pUnityTable [ j + uLength * 3 ] = vCos ;
pUnityTable [ j + uLength * 7 ] = XMVectorMultiply ( vSin , g_XMNegativeOne ) ;
vJP = XMVectorAdd ( vJP , g_XMFour ) ;
2016-08-22 18:43:47 +00:00
}
2021-07-14 21:01:11 +00:00
vlStep = XMVectorMultiply ( vlStep , g_XMFour ) ;
pUnityTable + = uLength * 8 ;
2021-09-22 03:25:30 +00:00
} while ( uLength > 4 ) ;
2016-08-22 18:43:47 +00:00
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// The FFT functions generate output in bit reversed order.
// Use this function to re-arrange them into order of increasing frequency.
//
// REMARKS:
2021-07-14 21:01:11 +00:00
// Exponential values and bits correspond, so the reversed upper index can be omitted depending on the number of exponents.
2016-08-22 18:43:47 +00:00
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in order of increasing frequency, cannot overlap pInput, must have at least (1<<uLog2Length)/4 elements
// pInput - [in] input buffer, samples in bit reversed order as generated by FFT functions, cannot overlap pOutput, must have at least (1<<uLog2Length)/4 elements
// uLog2Length - [in] LOG (base 2) of FFT length in samples, must be >= 2
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void FFTUnswizzle (
2021-07-14 21:01:11 +00:00
_Out_writes_ ( ( 1 < < uLog2Length ) / 4 ) XMVECTOR * __restrict pOutput ,
_In_reads_ ( ( 1 < < uLog2Length ) / 4 ) const XMVECTOR * __restrict pInput ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uLog2Length ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pOutput ) ;
assert ( pInput ) ;
assert ( uLog2Length > = 2 ) ;
_Analysis_assume_ ( uLog2Length > = 2 ) ;
2021-07-14 21:01:11 +00:00
float * __restrict pfOutput = reinterpret_cast < float * > ( pOutput ) ;
const size_t uLength = size_t ( 1 ) < < ( uLog2Length - 2 ) ;
static const unsigned char cSwizzleTable [ 256 ] = {
0x00 , 0x40 , 0x80 , 0xC0 , 0x10 , 0x50 , 0x90 , 0xD0 , 0x20 , 0x60 , 0xA0 , 0xE0 , 0x30 , 0x70 , 0xB0 , 0xF0 ,
0x04 , 0x44 , 0x84 , 0xC4 , 0x14 , 0x54 , 0x94 , 0xD4 , 0x24 , 0x64 , 0xA4 , 0xE4 , 0x34 , 0x74 , 0xB4 , 0xF4 ,
0x08 , 0x48 , 0x88 , 0xC8 , 0x18 , 0x58 , 0x98 , 0xD8 , 0x28 , 0x68 , 0xA8 , 0xE8 , 0x38 , 0x78 , 0xB8 , 0xF8 ,
0x0C , 0x4C , 0x8C , 0xCC , 0x1C , 0x5C , 0x9C , 0xDC , 0x2C , 0x6C , 0xAC , 0xEC , 0x3C , 0x7C , 0xBC , 0xFC ,
0x01 , 0x41 , 0x81 , 0xC1 , 0x11 , 0x51 , 0x91 , 0xD1 , 0x21 , 0x61 , 0xA1 , 0xE1 , 0x31 , 0x71 , 0xB1 , 0xF1 ,
0x05 , 0x45 , 0x85 , 0xC5 , 0x15 , 0x55 , 0x95 , 0xD5 , 0x25 , 0x65 , 0xA5 , 0xE5 , 0x35 , 0x75 , 0xB5 , 0xF5 ,
0x09 , 0x49 , 0x89 , 0xC9 , 0x19 , 0x59 , 0x99 , 0xD9 , 0x29 , 0x69 , 0xA9 , 0xE9 , 0x39 , 0x79 , 0xB9 , 0xF9 ,
0x0D , 0x4D , 0x8D , 0xCD , 0x1D , 0x5D , 0x9D , 0xDD , 0x2D , 0x6D , 0xAD , 0xED , 0x3D , 0x7D , 0xBD , 0xFD ,
0x02 , 0x42 , 0x82 , 0xC2 , 0x12 , 0x52 , 0x92 , 0xD2 , 0x22 , 0x62 , 0xA2 , 0xE2 , 0x32 , 0x72 , 0xB2 , 0xF2 ,
0x06 , 0x46 , 0x86 , 0xC6 , 0x16 , 0x56 , 0x96 , 0xD6 , 0x26 , 0x66 , 0xA6 , 0xE6 , 0x36 , 0x76 , 0xB6 , 0xF6 ,
0x0A , 0x4A , 0x8A , 0xCA , 0x1A , 0x5A , 0x9A , 0xDA , 0x2A , 0x6A , 0xAA , 0xEA , 0x3A , 0x7A , 0xBA , 0xFA ,
0x0E , 0x4E , 0x8E , 0xCE , 0x1E , 0x5E , 0x9E , 0xDE , 0x2E , 0x6E , 0xAE , 0xEE , 0x3E , 0x7E , 0xBE , 0xFE ,
0x03 , 0x43 , 0x83 , 0xC3 , 0x13 , 0x53 , 0x93 , 0xD3 , 0x23 , 0x63 , 0xA3 , 0xE3 , 0x33 , 0x73 , 0xB3 , 0xF3 ,
0x07 , 0x47 , 0x87 , 0xC7 , 0x17 , 0x57 , 0x97 , 0xD7 , 0x27 , 0x67 , 0xA7 , 0xE7 , 0x37 , 0x77 , 0xB7 , 0xF7 ,
0x0B , 0x4B , 0x8B , 0xCB , 0x1B , 0x5B , 0x9B , 0xDB , 0x2B , 0x6B , 0xAB , 0xEB , 0x3B , 0x7B , 0xBB , 0xFB ,
0x0F , 0x4F , 0x8F , 0xCF , 0x1F , 0x5F , 0x9F , 0xDF , 0x2F , 0x6F , 0xAF , 0xEF , 0x3F , 0x7F , 0xBF , 0xFF
} ;
if ( ( uLog2Length & 1 ) = = 0 )
2016-08-22 18:43:47 +00:00
{
// even powers of two
2021-07-14 21:01:11 +00:00
const size_t uRev32 = 32 - uLog2Length ;
for ( size_t uIndex = 0 ; uIndex < uLength ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
XMFLOAT4A f4a ;
XMStoreFloat4A ( & f4a , pInput [ uIndex ] ) ;
const size_t n = uIndex * 4 ;
const size_t uAddr = ( static_cast < size_t > ( cSwizzleTable [ n & 0xff ] ) < < 24 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 8 ) & 0xff ] ) < < 16 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 16 ) & 0xff ] ) < < 8 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 24 ) ] ) ) ;
pfOutput [ uAddr > > uRev32 ] = f4a . x ;
pfOutput [ ( 0x40000000 | uAddr ) > > uRev32 ] = f4a . y ;
pfOutput [ ( 0x80000000 | uAddr ) > > uRev32 ] = f4a . z ;
pfOutput [ ( 0xC0000000 | uAddr ) > > uRev32 ] = f4a . w ;
2016-08-22 18:43:47 +00:00
}
}
else
{
// odd powers of two
2021-07-14 21:01:11 +00:00
const size_t uRev7 = size_t ( 1 ) < < ( uLog2Length - 3 ) ;
const size_t uRev32 = 32 - ( uLog2Length - 3 ) ;
for ( size_t uIndex = 0 ; uIndex < uLength ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
XMFLOAT4A f4a ;
XMStoreFloat4A ( & f4a , pInput [ uIndex ] ) ;
const size_t n = ( uIndex > > 1 ) ;
size_t uAddr = ( ( ( static_cast < size_t > ( cSwizzleTable [ n & 0xff ] ) < < 24 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 8 ) & 0xff ] ) < < 16 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 16 ) & 0xff ] ) < < 8 ) |
( static_cast < size_t > ( cSwizzleTable [ ( n > > 24 ) ] ) ) ) > > uRev32 ) |
( ( uIndex & 1 ) * uRev7 * 4 ) ;
pfOutput [ uAddr ] = f4a . x ;
uAddr + = uRev7 ;
pfOutput [ uAddr ] = f4a . y ;
uAddr + = uRev7 ;
pfOutput [ uAddr ] = f4a . z ;
uAddr + = uRev7 ;
pfOutput [ uAddr ] = f4a . w ;
2016-08-22 18:43:47 +00:00
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Convert complex components to polar form.
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in polar form, must have at least uLength/4 elements
// pInputReal - [in] input buffer (real components), must have at least uLength/4 elements
// pInputImaginary - [in] input buffer (imaginary components), must have at least uLength/4 elements
// uLength - [in] FFT length in samples, must be a power of 2 >= 4
//----------------------------------------------------------------------------------
2021-11-29 21:08:15 +00:00
# ifdef _MSC_VER
2016-08-22 18:43:47 +00:00
# pragma warning(suppress: 6101)
2021-11-29 21:08:15 +00:00
# endif
2019-12-12 04:16:59 +00:00
inline void FFTPolar (
2021-07-14 21:01:11 +00:00
_Out_writes_ ( uLength / 4 ) XMVECTOR * __restrict pOutput ,
_In_reads_ ( uLength / 4 ) const XMVECTOR * __restrict pInputReal ,
_In_reads_ ( uLength / 4 ) const XMVECTOR * __restrict pInputImaginary ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uLength ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
assert ( pOutput ) ;
assert ( pInputReal ) ;
assert ( pInputImaginary ) ;
assert ( uLength > = 4 ) ;
_Analysis_assume_ ( uLength > = 4 ) ;
assert ( ISPOWEROF2 ( uLength ) ) ;
2021-07-14 21:01:11 +00:00
const float flOneOverLength = 1.0f / float ( uLength ) ;
2016-08-22 18:43:47 +00:00
// result = sqrtf((real/uLength)^2 + (imaginary/uLength)^2) * 2
2021-07-14 21:01:11 +00:00
const XMVECTOR vOneOverLength = XMVectorReplicate ( flOneOverLength ) ;
2016-08-22 18:43:47 +00:00
2021-07-14 21:01:11 +00:00
for ( size_t uIndex = 0 ; uIndex < ( uLength > > 2 ) ; + + uIndex )
2016-08-22 18:43:47 +00:00
{
XMVECTOR vReal = XMVectorMultiply ( pInputReal [ uIndex ] , vOneOverLength ) ;
XMVECTOR vImaginary = XMVectorMultiply ( pInputImaginary [ uIndex ] , vOneOverLength ) ;
XMVECTOR vRR = XMVectorMultiply ( vReal , vReal ) ;
XMVECTOR vII = XMVectorMultiply ( vImaginary , vImaginary ) ;
XMVECTOR vRRplusII = XMVectorAdd ( vRR , vII ) ;
XMVECTOR vTotal = XMVectorSqrt ( vRRplusII ) ;
pOutput [ uIndex ] = XMVectorAdd ( vTotal , vTotal ) ;
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Deinterleaves audio samples
//
// REMARKS:
// For example, audio of the form [LRLRLR] becomes [LLLRRR].
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in deinterleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
// pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
// uChannelCount - [in] number of channels, must be > 1
// uFrameCount - [in] number of frames of valid data, must be > 0
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void Deinterleave (
2021-07-14 21:01:11 +00:00
_Out_writes_ ( ( uChannelCount * uFrameCount ) / 4 ) XMVECTOR * __restrict pOutput ,
_In_reads_ ( ( uChannelCount * uFrameCount ) / 4 ) const XMVECTOR * __restrict pInput ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uChannelCount ,
_In_ const size_t uFrameCount ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pOutput ) ;
assert ( pInput ) ;
assert ( uChannelCount > 1 ) ;
assert ( uFrameCount > 0 ) ;
2021-07-14 21:01:11 +00:00
float * __restrict pfOutput = reinterpret_cast < float * __restrict > ( pOutput ) ;
2016-08-22 18:43:47 +00:00
const float * __restrict pfInput = reinterpret_cast < const float * __restrict > ( pInput ) ;
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
for ( size_t uFrame = 0 ; uFrame < uFrameCount ; + + uFrame )
2016-08-22 18:43:47 +00:00
{
pfOutput [ uChannel * uFrameCount + uFrame ] = pfInput [ uFrame * uChannelCount + uChannel ] ;
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// Interleaves audio samples
//
// REMARKS:
// For example, audio of the form [LLLRRR] becomes [LRLRLR].
//
// PARAMETERS:
// pOutput - [out] output buffer, receives samples in interleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements
// pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements
// uChannelCount - [in] number of channels, must be > 1
// uFrameCount - [in] number of frames of valid data, must be > 0
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void Interleave (
2021-07-14 21:01:11 +00:00
_Out_writes_ ( ( uChannelCount * uFrameCount ) / 4 ) XMVECTOR * __restrict pOutput ,
_In_reads_ ( ( uChannelCount * uFrameCount ) / 4 ) const XMVECTOR * __restrict pInput ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uChannelCount ,
_In_ const size_t uFrameCount ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pOutput ) ;
assert ( pInput ) ;
assert ( uChannelCount > 1 ) ;
assert ( uFrameCount > 0 ) ;
2021-07-14 21:01:11 +00:00
float * __restrict pfOutput = reinterpret_cast < float * __restrict > ( pOutput ) ;
2016-08-22 18:43:47 +00:00
const float * __restrict pfInput = reinterpret_cast < const float * __restrict > ( pInput ) ;
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
for ( size_t uFrame = 0 ; uFrame < uFrameCount ; + + uFrame )
2016-08-22 18:43:47 +00:00
{
pfOutput [ uFrame * uChannelCount + uChannel ] = pfInput [ uChannel * uFrameCount + uFrame ] ;
}
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// This function applies a 2^N-sample FFT and unswizzles the result such
// that the samples are in order of increasing frequency.
// Audio is first deinterleaved if multichannel.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pImaginary - [out] imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pUnityTable - [in] unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
// uChannelCount - [in] number of channels, must be within [1, 6]
// uLog2Length - [in] LOG (base 2) of FFT length in frames, must within [2, 9]
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void FFTInterleaved (
2021-07-14 21:01:11 +00:00
_Inout_updates_ ( ( ( 1 < < uLog2Length ) * uChannelCount ) / 4 ) XMVECTOR * __restrict pReal ,
_Out_writes_ ( ( ( 1 < < uLog2Length ) * uChannelCount ) / 4 ) XMVECTOR * __restrict pImaginary ,
_In_reads_ ( 1 < < uLog2Length ) const XMVECTOR * __restrict pUnityTable ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uChannelCount ,
_In_ const size_t uLog2Length ) noexcept
2016-08-22 18:43:47 +00:00
{
assert ( pReal ) ;
assert ( pImaginary ) ;
assert ( pUnityTable ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pUnityTable ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( uChannelCount > 0 & & uChannelCount < = 6 ) ;
assert ( uLog2Length > = 2 & & uLog2Length < = 9 ) ;
XMVECTOR vRealTemp [ 768 ] ;
XMVECTOR vImaginaryTemp [ 768 ] ;
const size_t uLength = size_t ( 1 ) < < uLog2Length ;
if ( uChannelCount > 1 )
{
Deinterleave ( vRealTemp , pReal , uChannelCount , uLength ) ;
}
else
{
2021-07-14 21:01:11 +00:00
memcpy_s ( vRealTemp , sizeof ( vRealTemp ) , pReal , ( uLength > > 2 ) * sizeof ( XMVECTOR ) ) ;
2016-08-22 18:43:47 +00:00
}
2021-07-14 21:01:11 +00:00
memset ( vImaginaryTemp , 0 , ( uChannelCount * ( uLength > > 2 ) ) * sizeof ( XMVECTOR ) ) ;
2016-08-22 18:43:47 +00:00
if ( uLength > 16 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] , pUnityTable , uLength ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 16 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT16 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 8 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT8 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 4 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT4 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFTUnswizzle ( & pReal [ uChannel * ( uLength > > 2 ) ] , & vRealTemp [ uChannel * ( uLength > > 2 ) ] , uLog2Length ) ;
FFTUnswizzle ( & pImaginary [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] , uLog2Length ) ;
2016-08-22 18:43:47 +00:00
}
}
//----------------------------------------------------------------------------------
// DESCRIPTION:
// This function applies a 2^N-sample inverse FFT.
// Audio is interleaved if multichannel.
//
// PARAMETERS:
// pReal - [inout] real components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pImaginary - [in] imaginary components, must have at least (1<<uLog2Length*uChannelCount)/4 elements
// pUnityTable - [in] unity table, must have at least (1<<uLog2Length) elements, see FFTInitializeUnityTable()
// uChannelCount - [in] number of channels, must be > 0
// uLog2Length - [in] LOG (base 2) of FFT length in frames, must within [2, 9]
//----------------------------------------------------------------------------------
2019-12-12 04:16:59 +00:00
inline void IFFTDeinterleaved (
2021-07-14 21:01:11 +00:00
_Inout_updates_ ( ( ( 1 < < uLog2Length ) * uChannelCount ) / 4 ) XMVECTOR * __restrict pReal ,
_In_reads_ ( ( ( 1 < < uLog2Length ) * uChannelCount ) / 4 ) const XMVECTOR * __restrict pImaginary ,
_In_reads_ ( 1 < < uLog2Length ) const XMVECTOR * __restrict pUnityTable ,
2019-12-12 04:16:59 +00:00
_In_ const size_t uChannelCount ,
_In_ const size_t uLog2Length ) noexcept
2016-08-22 18:43:47 +00:00
{
using namespace DirectX ;
assert ( pReal ) ;
assert ( pImaginary ) ;
assert ( pUnityTable ) ;
2019-07-31 19:26:12 +00:00
assert ( reinterpret_cast < uintptr_t > ( pReal ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pImaginary ) % 16 = = 0 ) ;
assert ( reinterpret_cast < uintptr_t > ( pUnityTable ) % 16 = = 0 ) ;
2016-08-22 18:43:47 +00:00
assert ( uChannelCount > 0 & & uChannelCount < = 6 ) ;
_Analysis_assume_ ( uChannelCount > 0 & & uChannelCount < = 6 ) ;
assert ( uLog2Length > = 2 & & uLog2Length < = 9 ) ;
_Analysis_assume_ ( uLog2Length > = 2 & & uLog2Length < = 9 ) ;
2018-01-18 23:27:02 +00:00
XMVECTOR vRealTemp [ 768 ] = { } ;
XMVECTOR vImaginaryTemp [ 768 ] = { } ;
2016-08-22 18:43:47 +00:00
const size_t uLength = size_t ( 1 ) < < uLog2Length ;
2020-05-12 22:25:56 +00:00
const XMVECTOR vRnp = XMVectorReplicate ( 1.0f / float ( uLength ) ) ;
const XMVECTOR vRnm = XMVectorReplicate ( - 1.0f / float ( uLength ) ) ;
2021-07-14 21:01:11 +00:00
for ( size_t u = 0 ; u < uChannelCount * ( uLength > > 2 ) ; u + + )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
vRealTemp [ u ] = XMVectorMultiply ( pReal [ u ] , vRnp ) ;
2016-08-22 18:43:47 +00:00
vImaginaryTemp [ u ] = XMVectorMultiply ( pImaginary [ u ] , vRnm ) ;
}
if ( uLength > 16 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] , pUnityTable , uLength ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 16 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT16 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 8 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT8 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
else if ( uLength = = 4 )
{
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFT4 ( & vRealTemp [ uChannel * ( uLength > > 2 ) ] , & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] ) ;
2016-08-22 18:43:47 +00:00
}
}
2021-07-14 21:01:11 +00:00
for ( size_t uChannel = 0 ; uChannel < uChannelCount ; + + uChannel )
2016-08-22 18:43:47 +00:00
{
2021-07-14 21:01:11 +00:00
FFTUnswizzle ( & vImaginaryTemp [ uChannel * ( uLength > > 2 ) ] , & vRealTemp [ uChannel * ( uLength > > 2 ) ] , uLog2Length ) ;
2016-08-22 18:43:47 +00:00
}
if ( uChannelCount > 1 )
{
Interleave ( pReal , vImaginaryTemp , uChannelCount , uLength ) ;
}
else
{
2021-07-14 21:01:11 +00:00
memcpy_s ( pReal , uLength * uChannelCount * sizeof ( float ) , vImaginaryTemp , ( uLength > > 2 ) * sizeof ( XMVECTOR ) ) ;
2016-08-22 18:43:47 +00:00
}
}
2018-01-16 22:29:55 +00:00
} // namespace XDSP
2016-08-22 18:43:47 +00:00
2023-05-05 20:22:46 +00:00
# ifdef __clang__
# pragma clang diagnostic pop
# endif
2021-11-29 21:08:15 +00:00
# ifdef _MSC_VER
2016-08-22 18:43:47 +00:00
# pragma warning(pop)
2021-11-29 21:08:15 +00:00
# endif