52 #if (NE10_UNROLL_LEVEL > 0) 71 for (f_count = fstride; f_count; f_count --)
73 scratch_in[0] = Fin_r[in_step * 0] + Fin_r[in_step * (0 + 4)];
74 scratch_in[1] = Fin_r[in_step * 0] - Fin_r[in_step * (0 + 4)];
75 scratch_in[2] = Fin_r[in_step * 1] + Fin_r[in_step * (1 + 4)];
76 scratch_in[3] = Fin_r[in_step * 1] - Fin_r[in_step * (1 + 4)];
77 scratch_in[4] = Fin_r[in_step * 2] + Fin_r[in_step * (2 + 4)];
78 scratch_in[5] = Fin_r[in_step * 2] - Fin_r[in_step * (2 + 4)];
79 scratch_in[6] = Fin_r[in_step * 3] + Fin_r[in_step * (3 + 4)];
80 scratch_in[7] = Fin_r[in_step * 3] - Fin_r[in_step * (3 + 4)];
82 scratch_in[3] *= TW_81_F32;
83 scratch_in[7] *= TW_81N_F32;
86 scratch[0] = scratch_in[0] + scratch_in[4];
87 scratch[1] = scratch_in[2] + scratch_in[6];
88 scratch[2] = scratch_in[7] - scratch_in[3];
89 scratch[3] = scratch_in[3] + scratch_in[7];
91 Fout_r[0] = scratch [0] + scratch [1];
92 Fout_r[7] = scratch [0] - scratch [1];
94 Fout_r[1] = scratch_in[1] + scratch [3];
95 Fout_r[5] = scratch_in[1] - scratch [3];
97 Fout_r[2] = scratch [2] - scratch_in[5];
98 Fout_r[6] = scratch [2] + scratch_in[5];
100 Fout_r[3] = scratch_in[0] - scratch_in[4];
102 Fout_r[4] = scratch_in[6] - scratch_in[2];
126 for (f_count = fstride; f_count; f_count --)
128 scratch_in[0] = Fin_r[0] + Fin_r[3] + Fin_r[3] + Fin_r[7];
129 scratch_in[1] = Fin_r[1] + Fin_r[1] + Fin_r[5] + Fin_r[5];
130 scratch_in[2] = Fin_r[0] - Fin_r[4] - Fin_r[4] - Fin_r[7];
131 scratch_in[3] = Fin_r[1] - Fin_r[2] - Fin_r[5] - Fin_r[6];
132 scratch_in[4] = Fin_r[0] - Fin_r[3] - Fin_r[3] + Fin_r[7];
133 scratch_in[5] = - Fin_r[2] - Fin_r[2] + Fin_r[6] + Fin_r[6];
134 scratch_in[6] = Fin_r[0] + Fin_r[4] + Fin_r[4] - Fin_r[7];
135 scratch_in[7] = Fin_r[1] + Fin_r[2] - Fin_r[5] + Fin_r[6];
137 scratch_in[3] /= TW_81_F32;
138 scratch_in[7] /= TW_81N_F32;
140 Fout_r[0 * in_step] = scratch_in[0] + scratch_in[1];
141 Fout_r[4 * in_step] = scratch_in[0] - scratch_in[1];
142 Fout_r[1 * in_step] = scratch_in[2] + scratch_in[3];
143 Fout_r[5 * in_step] = scratch_in[2] - scratch_in[3];
144 Fout_r[2 * in_step] = scratch_in[4] + scratch_in[5];
145 Fout_r[6 * in_step] = scratch_in[4] - scratch_in[5];
146 Fout_r[3 * in_step] = scratch_in[6] + scratch_in[7];
147 Fout_r[7 * in_step] = scratch_in[6] - scratch_in[7];
149 #if defined(NE10_DSP_RFFT_SCALING) 150 Fout_r[0 * in_step] *= one_by_N;
151 Fout_r[4 * in_step] *= one_by_N;
152 Fout_r[1 * in_step] *= one_by_N;
153 Fout_r[5 * in_step] *= one_by_N;
154 Fout_r[2 * in_step] *= one_by_N;
155 Fout_r[6 * in_step] *= one_by_N;
156 Fout_r[3 * in_step] *= one_by_N;
157 Fout_r[7 * in_step] *= one_by_N;
182 for (f_count = fstride; f_count; f_count --)
184 scratch_in[0] = Fin_r[0 * in_step];
185 scratch_in[1] = Fin_r[1 * in_step];
186 scratch_in[2] = Fin_r[2 * in_step];
187 scratch_in[3] = Fin_r[3 * in_step];
195 Fout_r[0] = scratch_out[0];
196 Fout_r[1] = scratch_out[1];
197 Fout_r[2] = scratch_out[2];
198 Fout_r[3] = scratch_out[3];
222 for (f_count = fstride; f_count; f_count --)
224 scratch_in[0] = Fin_r[0];
225 scratch_in[1] = Fin_r[1];
226 scratch_in[2] = Fin_r[2];
227 scratch_in[3] = Fin_r[3];
235 #if defined(NE10_DSP_RFFT_SCALING) 236 scratch_out[0] *= one_by_N;
237 scratch_out[1] *= one_by_N;
238 scratch_out[2] *= one_by_N;
239 scratch_out[3] *= one_by_N;
243 Fout_r[0 * in_step] = scratch_out[0];
244 Fout_r[1 * in_step] = scratch_out[1];
245 Fout_r[2 * in_step] = scratch_out[2];
246 Fout_r[3 * in_step] = scratch_out[3];
260 Fout_r[0] = Fin_r[0] + Fin_r[1];
261 Fout_r[1] = Fin_r[0] - Fin_r[1];
270 Fout_r[0] = Fin_r[0] + Fin_r[1];
271 Fout_r[1] = Fin_r[0] - Fin_r[1];
272 #if defined(NE10_DSP_RFFT_SCALING) 288 scratch_in[0] = Fin_r[0 * in_step];
289 scratch_in[1] = Fin_r[1 * in_step];
290 scratch_in[2] = Fin_r[2 * in_step];
291 scratch_in[3] = Fin_r[3 * in_step];
300 Fout_r[ 0] = scratch_out[0];
301 Fout_r[ (out_step << 1) - 1] = scratch_out[1];
302 Fout_r[ (out_step << 1) ] = scratch_out[2];
303 Fout_r[2 * (out_step << 1) - 1] = scratch_out[3];
317 scratch_in_r[0] = Fin_r[0 ];
318 scratch_in_r[1] = Fin_r[1*(out_step<<1)-1];
319 scratch_in_r[2] = Fin_r[1*(out_step<<1) ];
320 scratch_in_r[3] = Fin_r[2*(out_step<<1)-1];
325 scratch[0] = scratch_in_r[0] + scratch_in_r[3];
326 scratch[1] = scratch_in_r[0] - scratch_in_r[3];
327 scratch[2] = scratch_in_r[1] + scratch_in_r[1];
328 scratch[3] = scratch_in_r[2] + scratch_in_r[2];
330 scratch_out_r[0] = scratch[0] + scratch[2];
331 scratch_out_r[1] = scratch[1] - scratch[3];
332 scratch_out_r[2] = scratch[0] - scratch[2];
333 scratch_out_r[3] = scratch[1] + scratch[3];
338 Fout_r[0 * in_step] = scratch_out_r[0];
339 Fout_r[1 * in_step] = scratch_out_r[1];
340 Fout_r[2 * in_step] = scratch_out_r[2];
341 Fout_r[3 * in_step] = scratch_out_r[3];
357 for (m_count = (out_step >> 1) - 1; m_count; m_count --)
359 scratch_tw [0] = twiddles[0 * out_step];
360 scratch_tw [1] = twiddles[1 * out_step];
361 scratch_tw [2] = twiddles[2 * out_step];
363 scratch_in[0].
r = Fin_r[0 * in_step ];
364 scratch_in[0].i = Fin_r[0 * in_step + 1];
365 scratch_in[1].r = Fin_r[1 * in_step ];
366 scratch_in[1].i = Fin_r[1 * in_step + 1];
367 scratch_in[2].r = Fin_r[2 * in_step ];
368 scratch_in[2].i = Fin_r[2 * in_step + 1];
369 scratch_in[3].r = Fin_r[3 * in_step ];
370 scratch_in[3].i = Fin_r[3 * in_step + 1];
375 scratch[0].
r = scratch_in[0].r;
376 scratch[0].
i = scratch_in[0].i;
377 scratch[1].
r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
378 scratch[1].
i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
380 scratch[2].
r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
381 scratch[2].
i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
383 scratch[3].
r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
384 scratch[3].
i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
391 Fout_r[ 0] = scratch_out[0].
r;
392 Fout_r[ 1] = scratch_out[0].
i;
393 Fout_r[ (out_step << 1) ] = scratch_out[1].r;
394 Fout_r[ (out_step << 1) + 1] = scratch_out[1].i;
395 Fout_b[ 0] = scratch_out[2].
r;
396 Fout_b[ 1] = scratch_out[2].
i;
397 Fout_b[- (out_step << 1) ] = scratch_out[3].r;
398 Fout_b[- (out_step << 1) + 1] = scratch_out[3].i;
421 for (m_count = (out_step >> 1) - 1; m_count; m_count --)
423 scratch_tw[0] = twiddles[0 * out_step];
424 scratch_tw[1] = twiddles[1 * out_step];
425 scratch_tw[2] = twiddles[2 * out_step];
427 scratch_in[0].
r = Fin_r[0];
428 scratch_in[0].i = Fin_r[1];
430 scratch_in[1].r = Fin_b[0];
431 scratch_in[1].i = Fin_b[1];
433 scratch_in[2].r = Fin_r[(out_step<<1) + 0];
434 scratch_in[2].i = Fin_r[(out_step<<1) + 1];
436 scratch_in[3].r = Fin_b[-(out_step<<1) + 0];
437 scratch_in[3].i = Fin_b[-(out_step<<1) + 1];
445 scratch_out[0] = scratch[0];
447 scratch_out[1].r = scratch[1].r * scratch_tw[0].r + scratch[1].i * scratch_tw[0].i;
448 scratch_out[1].i = scratch[1].i * scratch_tw[0].r - scratch[1].r * scratch_tw[0].i;
450 scratch_out[2].r = scratch[2].r * scratch_tw[1].r + scratch[2].i * scratch_tw[1].i;
451 scratch_out[2].i = scratch[2].i * scratch_tw[1].r - scratch[2].r * scratch_tw[1].i;
453 scratch_out[3].r = scratch[3].r * scratch_tw[2].r + scratch[3].i * scratch_tw[2].i;
454 scratch_out[3].i = scratch[3].i * scratch_tw[2].r - scratch[3].r * scratch_tw[2].i;
459 Fout_r[0 * in_step ] = scratch_out[0].r;
460 Fout_r[0 * in_step + 1] = scratch_out[0].i;
461 Fout_r[1 * in_step ] = scratch_out[1].r;
462 Fout_r[1 * in_step + 1] = scratch_out[1].i;
463 Fout_r[2 * in_step ] = scratch_out[2].r;
464 Fout_r[2 * in_step + 1] = scratch_out[2].i;
465 Fout_r[3 * in_step ] = scratch_out[3].r;
466 Fout_r[3 * in_step + 1] = scratch_out[3].i;
485 scratch_in[0] = Fin_r[0 * in_step];
486 scratch_in[1] = Fin_r[1 * in_step];
487 scratch_in[2] = Fin_r[2 * in_step];
488 scratch_in[3] = Fin_r[3 * in_step];
496 Fout_r[ 0] = scratch_out[0];
497 Fout_r[ 1] = scratch_out[1];
498 Fout_r[ (out_step << 1) ] = scratch_out[2];
499 Fout_r[ (out_step << 1) + 1] = scratch_out[3];
513 scratch_in[0] = Fin_r[ 0];
514 scratch_in[1] = Fin_r[ 1];
515 scratch_in[2] = Fin_r[ (out_step << 1) ];
516 scratch_in[3] = Fin_r[ (out_step << 1) + 1];
525 Fout_r[0 * in_step] = scratch_out[0];
526 Fout_r[1 * in_step] = scratch_out[1];
527 Fout_r[2 * in_step] = scratch_out[2];
528 Fout_r[3 * in_step] = scratch_out[3];
549 for (f_count = fstride; f_count; f_count --)
554 ne10_radix4_r2c_with_twiddles_first_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
561 ne10_radix4_r2c_with_twiddles_other_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
564 tw += ( (out_step >> 1) - 1);
565 Fin_r += 2 * ( (out_step >> 1) - 1);
566 Fout_r += 2 * ( (out_step >> 1) - 1);
569 ne10_radix4_r2c_with_twiddles_last_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
574 Fout_r += 3 * out_step;
593 for (f_count = fstride; f_count; f_count --)
598 ne10_radix4_c2r_with_twiddles_first_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
605 ne10_radix4_c2r_with_twiddles_other_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
608 tw += ( (out_step >> 1) - 1);
609 Fin_r += 2 * ( (out_step >> 1) - 1);
610 Fout_r += 2 * ( (out_step >> 1) - 1);
613 ne10_radix4_c2r_with_twiddles_last_butterfly_c (Fout_r, Fin_r, out_step, in_step, tw);
618 Fin_r += 3 * out_step;
622 NE10_INLINE void ne10_mixed_radix_r2c_butterfly_float32_c (
636 stage_count = factors[0];
637 fstride = factors[1];
638 mstride = factors[ (stage_count << 1) - 1 ];
639 radix = factors[ stage_count << 1 ];
640 nfft = radix * fstride;
644 if (stage_count % 2 == 0)
653 ne10_radix8_r2c_c (Fout, Fin, fstride, mstride, nfft);
658 ne10_radix4_r2c_c (Fout, Fin, fstride, mstride, nfft);
669 ne10_radix4_r2c_with_twiddles_c (Fout, buffer, fstride, mstride, nfft, twiddles);
670 twiddles += 3 * mstride;
676 NE10_INLINE void ne10_mixed_radix_c2r_butterfly_float32_c (
690 stage_count = factors[0];
691 fstride = factors[1];
692 mstride = factors[ (stage_count << 1) - 1 ];
693 radix = factors[ stage_count << 1 ];
694 nfft = radix * fstride;
701 if (stage_count % 2 == 1)
709 twiddles -= 3 * mstride;
712 ne10_radix4_c2r_with_twiddles_c (buffer, Fin, fstride, mstride, nfft, twiddles);
719 for (; stage_count > 1;)
721 twiddles -= 3 * mstride;
724 ne10_radix4_c2r_with_twiddles_c (Fout, buffer, fstride, mstride, nfft, twiddles);
736 ne10_radix8_c2r_c (Fout, buffer, fstride, mstride, nfft);
742 ne10_radix4_c2r_c (Fout, buffer, fstride, mstride, nfft);
789 st->r_twiddles = st->
buffer + nfft;
790 st->r_factors = (
ne10_int32_t*) (st->r_twiddles + nfft);
792 st->r_factors_neon = (
ne10_int32_t*) (st->r_twiddles_neon + nfft/4);
820 tw = st->r_super_twiddles_neon;
821 for (i = 1; i < 4; i ++)
823 for (j = 0; j < 4; j++)
834 for (k=1; k<nfft/32; k++)
837 for (s = 1; s < 4; s++)
839 for (j = 0; j < 4; j++)
878 ne10_mixed_radix_r2c_butterfly_float32_c (
887 fout[0].r = fout[0].i;
889 fout[(cfg->nfft) >> 1].i = 0.0f;
922 ne10_mixed_radix_c2r_butterfly_float32_c (
926 cfg->r_twiddles_backward,
938 #endif // NE10_UNROLL_LEVEL void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_c2r_1d_float32 using plain C.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_float32 and ne10_fft_c2r_1d_float32...
#define NE10_MAXFACTORS
Structure for the floating point FFT function.
#define ne10_swap_ptr(X, Y)
ne10_int32_t ne10_factor(ne10_int32_t n, ne10_int32_t *facbuf, ne10_int32_t ne10_factor_flags)
#define NE10_FFT_BYTE_ALIGNMENT
#define NE10_PI
NE10 defines a number of macros for use in its function signatures.
ne10_fft_cpx_float32_t * ne10_fft_generate_twiddles_transposed_float32(ne10_fft_cpx_float32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
#define NE10_FFT_R2C_CC_CC(OUT, IN)
#define NE10_FFT_C2R_RCR_4R(OUT, IN)
#define NE10_FACTOR_EIGHT_FIRST_STAGE
ne10_fft_cpx_float32_t * ne10_fft_generate_twiddles_float32(ne10_fft_cpx_float32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
#define NE10_FFT_R2C_4R_CC(OUT, IN)
#define NE10_BYTE_ALIGNMENT(address, alignment)
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_r2c_1d_float32 using plain C.
ne10_fft_cpx_float32_t * buffer
#define NE10_FFT_C2R_CC_CC(OUT, IN)
#define NE10_FFT_R2C_4R_RCR(OUT, IN)
#define NE10_FFT_C2R_CC_4R(OUT, IN)
ne10_fft_r2c_state_float32_t * ne10_fft_r2c_cfg_float32_t