47 #ifndef NE10_FFT_GENERIC_INT32_H 48 #define NE10_FFT_GENERIC_INT32_H 54 #define NE10_CPX_MUL_S32(Z,A,B) \ 56 ne10_int32_t ARBR = ((NE10_F2I32_SAMPPROD) A.r * B.r) >> 31; \ 57 ne10_int32_t ARBI = ((NE10_F2I32_SAMPPROD) A.r * B.i) >> 31; \ 58 ne10_int32_t AIBR = ((NE10_F2I32_SAMPPROD) A.i * B.r) >> 31; \ 59 ne10_int32_t AIBI = ((NE10_F2I32_SAMPPROD) A.i * B.i) >> 31; \ 64 #define NE10_S_MUL_S32(A,S) (((NE10_F2I32_SAMPPROD) (A) * (S)) >> 31) 121 NE10_CPX_ADD (scratch_out[0], scratch_in[0], scratch_in[1]);
122 NE10_CPX_SUB (scratch_out[1], scratch_in[0], scratch_in[1]);
137 scratch_in[0] = Fin[0];
138 scratch_in[1] = Fin[1];
139 scratch_in[2] = Fin[2];
141 scratch[1] = scratch_in[1];
142 scratch[2] = scratch_in[2];
147 scratch_in[1].
r = scratch_in[0].
r - (scratch[3].
r >> 1);
148 scratch_in[1].
i = scratch_in[0].
i - (scratch[3].
i >> 1);
153 scratch_in[0].
r += scratch[3].
r;
154 scratch_in[0].
i += scratch[3].
i;
156 scratch_in[2].
r = scratch_in[1].
r + scratch[0].
i;
157 scratch_in[2].
i = scratch_in[1].
i - scratch[0].
r;
159 scratch_in[1].
r -= scratch[0].
i;
160 scratch_in[1].
i += scratch[0].
r;
162 Fout[0] = scratch_in[0];
163 Fout[1] = scratch_in[1];
164 Fout[2] = scratch_in[2];
178 NE10_CPX_ADD (scratch[0], scratch_in[0], scratch_in[2]);
179 NE10_CPX_SUB (scratch[1], scratch_in[0], scratch_in[2]);
180 NE10_CPX_ADD (scratch[2], scratch_in[1], scratch_in[3]);
181 NE10_CPX_SUB (scratch[3], scratch_in[1], scratch_in[3]);
186 scratch_out[1].r = scratch[1].
r + scratch[3].
i;
187 scratch_out[1].i = scratch[1].
i - scratch[3].
r;
188 scratch_out[3].r = scratch[1].
r - scratch[3].
i;
189 scratch_out[3].i = scratch[1].
i + scratch[3].
r;
203 scratch_in[0] = Fin[0];
204 scratch_in[1] = Fin[1];
205 scratch_in[2] = Fin[2];
206 scratch_in[3] = Fin[3];
207 scratch_in[4] = Fin[4];
209 scratch[0] = scratch_in[0];
210 scratch[1] = scratch_in[1];
211 scratch[2] = scratch_in[2];
212 scratch[3] = scratch_in[3];
213 scratch[4] = scratch_in[4];
220 scratch_in[0].
r += scratch[7].
r + scratch[8].
r;
221 scratch_in[0].
i += scratch[7].
i + scratch[8].
i;
223 scratch[5].
r = scratch[0].
r 226 scratch[5].
i = scratch[0].
i 238 scratch[11].
r = scratch[0].
r 241 scratch[11].
i = scratch[0].
i 253 Fout[0] = scratch_in[0];
254 Fout[1] = scratch_in[1];
255 Fout[2] = scratch_in[2];
256 Fout[3] = scratch_in[3];
257 Fout[4] = scratch_in[4];
269 scalar.i = -scalar.i;
277 template<
int RADIX,
class T>
281 NE10_CONJ_S<T> (in[RADIX - 1]);
309 template<
int RADIX,
class T>
320 out[0] = NE10_CPX_LOAD_S<ne10_fft_cpx_int32_t> (Fin);
323 template<
int RADIX,
class T>
328 out[0] = NE10_CPX_LOAD_S<T> (Fin);
339 template<
int RADIX,
class T>
344 NE10_CPX_STORE_S<T> (Fout, in[0]);
378 #endif // NE10_FFT_GENERIC_INT32_H void FFT_FCU(ne10_fft_cpx_int32_t scratch_out[RADIX], const ne10_fft_cpx_int32_t scratch_in[RADIX])
Basic fixed-point butterfly used in each stage.
void NE10_SCALED< 1 >(ne10_fft_cpx_int32_t out[1], const ne10_int32_t scaling)
void NE10_STORE_BY_STEP< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t in[1], const ne10_int32_t)
T NE10_CPX_LOAD_S(const T *ptr)
#define NE10_S_MUL_S32(A, S)
void NE10_LOAD_BY_STEP< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t out[0], const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t)
void NE10_SCALED(ne10_fft_cpx_int32_t out[RADIX], const ne10_int32_t scaling)
Scale a fixed-size array by given divider.
#define NE10_F2I32_FIXDIV(c, div)
void FFT_FCU< 2 >(ne10_fft_cpx_int32_t scratch_out[2], const ne10_fft_cpx_int32_t scratch_in[2])
Basic fixed-point Radix-2 butterfly used in each stage.
void NE10_CONJ< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t in[1])
void FFT_MUL_TW< 2 >(ne10_fft_cpx_int32_t out[2], const ne10_fft_cpx_int32_t in[2], const ne10_fft_cpx_int32_t tw[1])
void NE10_STORE_BY_STEP(T *Fout, const T in[RADIX], const ne10_int32_t out_step)
Store a fixed-size array to given buffer, by given step.
Structure for the 32-bit fixed point FFT function.
void NE10_LOAD_BY_STEP(T out[RADIX], const T *Fin, const ne10_int32_t in_step)
Load a fixed-size array from given buffer, by given step.
void NE10_CONJ_S(T &)
Conjugate a fix-point complex scalar/NEON vector.
#define NE10_CPX_MUL_S32(Z, A, B)
void NE10_CONJ(T in[RADIX])
Conjugate a fix-point complex array.
void FFT_FCU< 5 >(ne10_fft_cpx_int32_t Fout[5], const ne10_fft_cpx_int32_t Fin[5])
Basic fixed-point radix-5 butterfly used in each stage.
void NE10_CONJ_S< ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t &scalar)
void FFT_FCU< 3 >(ne10_fft_cpx_int32_t Fout[3], const ne10_fft_cpx_int32_t Fin[3])
Basic fixed-point radix-3 butterfly used in each stage.
#define NE10_CPX_ADD(Z, A, B)
void FFT_FCU< 4 >(ne10_fft_cpx_int32_t scratch_out[4], const ne10_fft_cpx_int32_t scratch_in[4])
Basic fixed-point radix-4 butterfly used in each stage.
void NE10_CPX_STORE_S(T *Fout, const T in)
void FFT_MUL_TW(ne10_fft_cpx_int32_t out[RADIX], const ne10_fft_cpx_int32_t in[RADIX], const ne10_fft_cpx_int32_t tw[RADIX])
Multiply input with twiddles.
#define NE10_CPX_SUB(Z, A, B)