Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_generic_float32.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_generic_float32.c
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
51 
53 // Following are butterfly functions
55 static inline void ne10_radix_2_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
56  const ne10_fft_cpx_float32_t *Fin,
57  const ne10_fft_cpx_float32_t *twiddles,
58  const ne10_int32_t fstride,
59  const ne10_int32_t out_step,
60  const ne10_int32_t nfft,
61  const ne10_int32_t is_first_stage,
62  const ne10_int32_t is_inverse,
63  const ne10_int32_t is_scaled)
64 {
65  ne10_fft_cpx_float32_t scratch_in[2];
66  ne10_fft_cpx_float32_t scratch_out[2];
67 
68  const ne10_int32_t in_step = nfft / 2;
69  ne10_int32_t f_count;
70  ne10_int32_t m_count;
71 
72  for (f_count = fstride; f_count > 0; f_count--)
73  {
74  for (m_count = out_step; m_count > 0; m_count--)
75  {
76  scratch_in[0] = Fin[0 * in_step];
77  scratch_in[1] = Fin[1 * in_step];
78 
79  if (is_inverse)
80  {
81  scratch_in[0].i = -scratch_in[0].i;
82  scratch_in[1].i = -scratch_in[1].i;
83  }
84 
85 #ifdef NE10_DSP_CFFT_SCALING
86  if (is_scaled && is_first_stage)
87  {
88  const ne10_float32_t one_by_nfft = 1.0 / nfft;
89 
90  scratch_in[0].r *= one_by_nfft;
91  scratch_in[0].i *= one_by_nfft;
92  scratch_in[1].r *= one_by_nfft;
93  scratch_in[1].i *= one_by_nfft;
94  }
95 #endif
96 
97  if (!is_first_stage)
98  {
99  ne10_fft_cpx_float32_t scratch_tw[1];
100  ne10_fft_cpx_float32_t scratch[2];
101 
102  scratch_tw[0] = twiddles[0 * out_step];
103 
104  FFT2_MUL_TW (scratch, scratch_in, scratch_tw);
105 
106  scratch_in[0] = scratch[0];
107  scratch_in[1] = scratch[1];
108  }
109 
110  FFT2_FCU (scratch_out, scratch_in);
111 
112  if (is_inverse)
113  {
114  scratch_out[0].i = -scratch_out[0].i;
115  scratch_out[1].i = -scratch_out[1].i;
116  }
117 
118  Fout[0 * out_step] = scratch_out[0];
119  Fout[1 * out_step] = scratch_out[1];
120 
121  Fin++;
122 
123  if (!is_first_stage)
124  {
125  Fout++;
126  twiddles++;
127  }
128  else
129  {
130  Fout += 2;
131  }
132  }
133  if (!is_first_stage)
134  {
135  twiddles -= out_step;
136  Fout += (2 - 1) * out_step;
137  }
138  }
139 }
140 
141 static inline void ne10_radix_4_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
142  const ne10_fft_cpx_float32_t *Fin,
143  const ne10_fft_cpx_float32_t *twiddles,
144  const ne10_int32_t fstride,
145  const ne10_int32_t out_step,
146  const ne10_int32_t nfft,
147  const ne10_int32_t is_first_stage,
148  const ne10_int32_t is_inverse,
149  const ne10_int32_t is_scaled)
150 {
151  ne10_fft_cpx_float32_t scratch_in[4];
152  ne10_fft_cpx_float32_t scratch_out[4];
153 
154  const ne10_int32_t in_step = nfft / 4;
155  ne10_int32_t f_count;
156  ne10_int32_t m_count;
157 
158  for (f_count = fstride; f_count > 0; f_count--)
159  {
160  for (m_count = out_step; m_count > 0; m_count--)
161  {
162  scratch_in[0] = Fin[0 * in_step];
163  scratch_in[1] = Fin[1 * in_step];
164  scratch_in[2] = Fin[2 * in_step];
165  scratch_in[3] = Fin[3 * in_step];
166 
167  if (is_inverse)
168  {
169  scratch_in[0].i = -scratch_in[0].i;
170  scratch_in[1].i = -scratch_in[1].i;
171  scratch_in[2].i = -scratch_in[2].i;
172  scratch_in[3].i = -scratch_in[3].i;
173  }
174 
175 #ifdef NE10_DSP_CFFT_SCALING
176  if (is_scaled && is_first_stage)
177  {
178  const ne10_float32_t one_by_nfft = 1.0 / nfft;
179 
180  scratch_in[0].r *= one_by_nfft;
181  scratch_in[0].i *= one_by_nfft;
182  scratch_in[1].r *= one_by_nfft;
183  scratch_in[1].i *= one_by_nfft;
184  scratch_in[2].r *= one_by_nfft;
185  scratch_in[2].i *= one_by_nfft;
186  scratch_in[3].r *= one_by_nfft;
187  scratch_in[3].i *= one_by_nfft;
188  }
189 #endif
190 
191  if (!is_first_stage)
192  {
193  ne10_fft_cpx_float32_t scratch_tw[3];
194  ne10_fft_cpx_float32_t scratch[4];
195 
196  scratch_tw[0] = twiddles[0 * out_step];
197  scratch_tw[1] = twiddles[1 * out_step];
198  scratch_tw[2] = twiddles[2 * out_step];
199 
200  FFT4_MUL_TW (scratch, scratch_in, scratch_tw);
201 
202  scratch_in[0] = scratch[0];
203  scratch_in[1] = scratch[1];
204  scratch_in[2] = scratch[2];
205  scratch_in[3] = scratch[3];
206  }
207 
208  FFT4_FCU (scratch_out, scratch_in);
209 
210  if (is_inverse)
211  {
212  scratch_out[0].i = -scratch_out[0].i;
213  scratch_out[1].i = -scratch_out[1].i;
214  scratch_out[2].i = -scratch_out[2].i;
215  scratch_out[3].i = -scratch_out[3].i;
216  }
217 
218  Fout[0 * out_step] = scratch_out[0];
219  Fout[1 * out_step] = scratch_out[1];
220  Fout[2 * out_step] = scratch_out[2];
221  Fout[3 * out_step] = scratch_out[3];
222 
223  Fin++;
224 
225  if (!is_first_stage)
226  {
227  Fout++;
228  twiddles++;
229  }
230  else
231  {
232  Fout += 4;
233  }
234  }
235  if (!is_first_stage)
236  {
237  twiddles -= out_step;
238  Fout += (4 - 1) * out_step;
239  }
240  }
241 }
242 
243 static inline void ne10_radix_8_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
244  const ne10_fft_cpx_float32_t *Fin,
245  const ne10_fft_cpx_float32_t *twiddles,
246  const ne10_int32_t fstride,
247  const ne10_int32_t out_step,
248  const ne10_int32_t nfft,
249  const ne10_int32_t is_first_stage,
250  const ne10_int32_t is_inverse,
251  const ne10_int32_t is_scaled)
252 {
253  assert (is_first_stage == 1);
254 
255  ne10_fft_cpx_float32_t scratch_in[8];
256  ne10_fft_cpx_float32_t scratch_out[8];
257 
258  const ne10_int32_t in_step = nfft / 8;
259  ne10_int32_t f_count;
260  ne10_int32_t m_count;
261 
262  for (f_count = fstride; f_count > 0; f_count--)
263  {
264  for (m_count = out_step; m_count > 0; m_count--)
265  {
266  scratch_in[0] = Fin[0 * in_step];
267  scratch_in[1] = Fin[1 * in_step];
268  scratch_in[2] = Fin[2 * in_step];
269  scratch_in[3] = Fin[3 * in_step];
270  scratch_in[4] = Fin[4 * in_step];
271  scratch_in[5] = Fin[5 * in_step];
272  scratch_in[6] = Fin[6 * in_step];
273  scratch_in[7] = Fin[7 * in_step];
274 
275  if (is_inverse)
276  {
277  scratch_in[0].i = -scratch_in[0].i;
278  scratch_in[1].i = -scratch_in[1].i;
279  scratch_in[2].i = -scratch_in[2].i;
280  scratch_in[3].i = -scratch_in[3].i;
281  scratch_in[4].i = -scratch_in[4].i;
282  scratch_in[5].i = -scratch_in[5].i;
283  scratch_in[6].i = -scratch_in[6].i;
284  scratch_in[7].i = -scratch_in[7].i;
285  }
286 
287 #ifdef NE10_DSP_CFFT_SCALING
288  if (is_scaled)
289  {
290  const ne10_float32_t one_by_nfft = 1.0 / nfft;
291 
292  scratch_in[0].r *= one_by_nfft;
293  scratch_in[0].i *= one_by_nfft;
294  scratch_in[1].r *= one_by_nfft;
295  scratch_in[1].i *= one_by_nfft;
296  scratch_in[2].r *= one_by_nfft;
297  scratch_in[2].i *= one_by_nfft;
298  scratch_in[3].r *= one_by_nfft;
299  scratch_in[3].i *= one_by_nfft;
300  scratch_in[4].r *= one_by_nfft;
301  scratch_in[4].i *= one_by_nfft;
302  scratch_in[5].r *= one_by_nfft;
303  scratch_in[5].i *= one_by_nfft;
304  scratch_in[6].r *= one_by_nfft;
305  scratch_in[6].i *= one_by_nfft;
306  scratch_in[7].r *= one_by_nfft;
307  scratch_in[7].i *= one_by_nfft;
308  }
309 #endif
310 
311  FFT8_FCU (scratch_out, scratch_in);
312 
313  if (is_inverse)
314  {
315  scratch_out[0].i = -scratch_out[0].i;
316  scratch_out[1].i = -scratch_out[1].i;
317  scratch_out[2].i = -scratch_out[2].i;
318  scratch_out[3].i = -scratch_out[3].i;
319  scratch_out[4].i = -scratch_out[4].i;
320  scratch_out[5].i = -scratch_out[5].i;
321  scratch_out[6].i = -scratch_out[6].i;
322  scratch_out[7].i = -scratch_out[7].i;
323  }
324 
325  Fout[0*out_step] = scratch_out[0];
326  Fout[1*out_step] = scratch_out[1];
327  Fout[2*out_step] = scratch_out[2];
328  Fout[3*out_step] = scratch_out[3];
329  Fout[4*out_step] = scratch_out[4];
330  Fout[5*out_step] = scratch_out[5];
331  Fout[6*out_step] = scratch_out[6];
332  Fout[7*out_step] = scratch_out[7];
333 
334  Fin++;
335  Fout += 8;
336  }
337  }
338 }
339 
340 static inline void ne10_radix_3_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
341  const ne10_fft_cpx_float32_t *Fin,
342  const ne10_fft_cpx_float32_t *twiddles,
343  const ne10_int32_t fstride,
344  const ne10_int32_t out_step,
345  const ne10_int32_t nfft,
346  const ne10_int32_t is_first_stage,
347  const ne10_int32_t is_inverse,
348  const ne10_int32_t is_scaled)
349 {
350  ne10_fft_cpx_float32_t scratch_in[3];
351  ne10_fft_cpx_float32_t scratch_out[3];
352 
353  const ne10_int32_t in_step = nfft / 3;
354  ne10_int32_t f_count;
355  ne10_int32_t m_count;
356 
357  for (f_count = fstride; f_count > 0; f_count--)
358  {
359  for (m_count = out_step; m_count > 0; m_count--)
360  {
361  scratch_in[0] = Fin[0 * in_step];
362  scratch_in[1] = Fin[1 * in_step];
363  scratch_in[2] = Fin[2 * in_step];
364 
365  if (is_inverse)
366  {
367  scratch_in[0].i = -scratch_in[0].i;
368  scratch_in[1].i = -scratch_in[1].i;
369  scratch_in[2].i = -scratch_in[2].i;
370  }
371 
372 #ifdef NE10_DSP_CFFT_SCALING
373  if (is_scaled && is_first_stage)
374  {
375  const ne10_float32_t one_by_nfft = 1.0 / nfft;
376 
377  scratch_in[0].r *= one_by_nfft;
378  scratch_in[0].i *= one_by_nfft;
379  scratch_in[1].r *= one_by_nfft;
380  scratch_in[1].i *= one_by_nfft;
381  scratch_in[2].r *= one_by_nfft;
382  scratch_in[2].i *= one_by_nfft;
383  }
384 #endif
385 
386  if (!is_first_stage)
387  {
388  ne10_fft_cpx_float32_t scratch_tw[2];
389  ne10_fft_cpx_float32_t scratch[3];
390 
391  scratch_tw[0] = twiddles[0 * out_step];
392  scratch_tw[1] = twiddles[1 * out_step];
393 
394  FFT3_MUL_TW (scratch, scratch_in, scratch_tw);
395 
396  scratch_in[0] = scratch[0];
397  scratch_in[1] = scratch[1];
398  scratch_in[2] = scratch[2];
399  }
400 
401  FFT3_FCU (scratch_out, scratch_in);
402 
403  if (is_inverse)
404  {
405  scratch_out[0].i = -scratch_out[0].i;
406  scratch_out[1].i = -scratch_out[1].i;
407  scratch_out[2].i = -scratch_out[2].i;
408  }
409 
410  Fout[0 * out_step] = scratch_out[0];
411  Fout[1 * out_step] = scratch_out[1];
412  Fout[2 * out_step] = scratch_out[2];
413 
414  Fin++;
415 
416  if (!is_first_stage)
417  {
418  Fout++;
419  twiddles++;
420  }
421  else
422  {
423  Fout += 3;
424  }
425  }
426  if (!is_first_stage)
427  {
428  twiddles -= out_step;
429  Fout += (3 - 1) * out_step;
430  }
431  }
432 }
433 
434 static inline void ne10_radix_5_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
435  const ne10_fft_cpx_float32_t *Fin,
436  const ne10_fft_cpx_float32_t *twiddles,
437  const ne10_int32_t fstride,
438  const ne10_int32_t out_step,
439  const ne10_int32_t nfft,
440  const ne10_int32_t is_first_stage,
441  const ne10_int32_t is_inverse,
442  const ne10_int32_t is_scaled)
443 {
444  ne10_fft_cpx_float32_t scratch_in[5];
445  ne10_fft_cpx_float32_t scratch_out[5];
446 
447  const ne10_int32_t in_step = nfft / 5;
448  ne10_int32_t f_count;
449  ne10_int32_t m_count;
450 
451  for (f_count = fstride; f_count > 0; f_count--)
452  {
453  for (m_count = out_step; m_count > 0; m_count--)
454  {
455  scratch_in[0] = Fin[0 * in_step];
456  scratch_in[1] = Fin[1 * in_step];
457  scratch_in[2] = Fin[2 * in_step];
458  scratch_in[3] = Fin[3 * in_step];
459  scratch_in[4] = Fin[4 * in_step];
460 
461  if (is_inverse)
462  {
463  scratch_in[0].i = -scratch_in[0].i;
464  scratch_in[1].i = -scratch_in[1].i;
465  scratch_in[2].i = -scratch_in[2].i;
466  scratch_in[3].i = -scratch_in[3].i;
467  scratch_in[4].i = -scratch_in[4].i;
468  }
469 
470 #ifdef NE10_DSP_CFFT_SCALING
471  if (is_scaled && is_first_stage)
472  {
473  const ne10_float32_t one_by_nfft = 1.0 / nfft;
474 
475  scratch_in[0].r *= one_by_nfft;
476  scratch_in[0].i *= one_by_nfft;
477  scratch_in[1].r *= one_by_nfft;
478  scratch_in[1].i *= one_by_nfft;
479  scratch_in[2].r *= one_by_nfft;
480  scratch_in[2].i *= one_by_nfft;
481  scratch_in[3].r *= one_by_nfft;
482  scratch_in[3].i *= one_by_nfft;
483  scratch_in[4].r *= one_by_nfft;
484  scratch_in[4].i *= one_by_nfft;
485  }
486 #endif
487 
488  if (!is_first_stage)
489  {
490  ne10_fft_cpx_float32_t scratch_tw[4];
491  ne10_fft_cpx_float32_t scratch[5];
492 
493  scratch_tw[0] = twiddles[0 * out_step];
494  scratch_tw[1] = twiddles[1 * out_step];
495  scratch_tw[2] = twiddles[2 * out_step];
496  scratch_tw[3] = twiddles[3 * out_step];
497 
498  FFT5_MUL_TW (scratch, scratch_in, scratch_tw);
499 
500  scratch_in[0] = scratch[0];
501  scratch_in[1] = scratch[1];
502  scratch_in[2] = scratch[2];
503  scratch_in[3] = scratch[3];
504  scratch_in[4] = scratch[4];
505  }
506 
507  FFT5_FCU (scratch_out, scratch_in);
508 
509  if (is_inverse)
510  {
511  scratch_out[0].i = -scratch_out[0].i;
512  scratch_out[1].i = -scratch_out[1].i;
513  scratch_out[2].i = -scratch_out[2].i;
514  scratch_out[3].i = -scratch_out[3].i;
515  scratch_out[4].i = -scratch_out[4].i;
516  }
517 
518  Fout[0 * out_step] = scratch_out[0];
519  Fout[1 * out_step] = scratch_out[1];
520  Fout[2 * out_step] = scratch_out[2];
521  Fout[3 * out_step] = scratch_out[3];
522  Fout[4 * out_step] = scratch_out[4];
523 
524  Fin++;
525 
526  if (!is_first_stage)
527  {
528  Fout++;
529  twiddles++;
530  }
531  else
532  {
533  Fout += 5;
534  }
535  }
536  if (!is_first_stage)
537  {
538  twiddles -= out_step;
539  Fout += (5 - 1) * out_step;
540  }
541  }
542 }
543 
544 static inline void ne10_radix_generic_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
545  const ne10_fft_cpx_float32_t *Fin,
546  const ne10_fft_cpx_float32_t *twiddles,
547  const ne10_int32_t radix,
548  const ne10_int32_t in_step,
549  const ne10_int32_t out_step,
550  const ne10_int32_t is_inverse,
551  const ne10_int32_t is_scaled)
552 {
553  ne10_int32_t q, q1;
554  ne10_int32_t f_count = in_step;
555 
557  ne10_fft_cpx_float32_t *scratch;
558  scratch = (ne10_fft_cpx_float32_t *) NE10_MALLOC (radix *
559  sizeof (ne10_fft_cpx_float32_t));
560 
561  for (; f_count > 0; f_count--)
562  {
563  // load
564  for (q1 = 0; q1 < radix; q1++)
565  {
566  scratch[q1] = Fin[in_step * q1];
567  if (is_inverse)
568  {
569  scratch[q1].i = -scratch[q1].i;
570 #ifdef NE10_DSP_CFFT_SCALING
571  if (is_scaled)
572  {
573  const ne10_float32_t one_by_nfft = 1.0 / (radix * in_step);
574  scratch[q1].r *= one_by_nfft;
575  scratch[q1].i *= one_by_nfft;
576  }
577 #endif
578  }
579  } // q1
580 
581  // compute Fout[q1 * out_step] from definition
582  for (q1 = 0; q1 < radix; q1++)
583  {
584  ne10_int32_t twidx = 0;
585  Fout[q1 * out_step] = scratch[0];
586  for (q = 1; q < radix; q++)
587  {
588  twidx += 1 * q1;
589  if (twidx >= radix)
590  {
591  twidx -= radix;
592  }
593  NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
594  NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
595  } // q
596  if (is_inverse)
597  {
598  Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
599  }
600  } // q1
601 
602  Fout += radix;
603  Fin++;
604  }
605 
606  NE10_FREE (scratch);
607 }
608 
609 static inline void ne10_mixed_radix_generic_butterfly_float32_impl_c (ne10_fft_cpx_float32_t *Fout,
610  const ne10_fft_cpx_float32_t *Fin,
611  const ne10_int32_t *factors,
612  const ne10_fft_cpx_float32_t *twiddles,
613  ne10_fft_cpx_float32_t *buffer,
614  const ne10_int32_t is_inverse,
615  const ne10_int32_t is_scaled)
616 {
617  ne10_int32_t fstride, mstride, radix;
618  ne10_int32_t stage_count;
619  ne10_int32_t nfft;
620 
621  // init fstride, mstride, radix, nfft
622  stage_count = factors[0];
623  fstride = factors[1];
624  mstride = 1;
625  radix = factors[stage_count << 1]; // radix of first stage
626  nfft = fstride * radix;
627 
628  if (stage_count % 2 == 0)
629  {
630  ne10_swap_ptr (buffer, Fout);
631  }
632 
633  // first stage
634  switch (radix)
635  {
636  case 2:
637  ne10_radix_2_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
638  is_inverse, is_scaled);
639  break;
640  case 4:
641  ne10_radix_4_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
642  is_inverse, is_scaled);
643  break;
644  case 3:
645  ne10_radix_3_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
646  is_inverse, is_scaled);
647  break;
648  case 5:
649  ne10_radix_5_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
650  is_inverse, is_scaled);
651  break;
652  case 8:
653  ne10_radix_8_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
654  is_inverse, is_scaled);
655  default:
656  ne10_radix_generic_butterfly_float32_c (Fout, Fin, twiddles, radix,
657  fstride, 1, is_inverse, is_scaled);
658  break;
659  }
660 
661  stage_count--;
662  if (!stage_count) // finish
663  {
664  return;
665  }
666 
667  if (radix % 2)
668  {
669  twiddles += radix;
670  }
671 
672  // other stges
673  while (stage_count > 0)
674  {
675  ne10_swap_ptr (buffer, Fout);
676  mstride *= radix;
677 
678  // update radix
679  radix = factors[stage_count << 1];
680  assert ((radix > 1) && (radix < 6));
681 
682  fstride /= radix;
683  switch (radix)
684  {
685  case 2:
686  ne10_radix_2_butterfly_float32_c (Fout, buffer, twiddles, fstride,
687  mstride, nfft, 0, is_inverse,
688  0); // Only scaling in the first stage.
689  break;
690  case 3:
691  ne10_radix_3_butterfly_float32_c (Fout, buffer, twiddles, fstride,
692  mstride, nfft, 0, is_inverse,
693  0); // Only scaling in the first stage.
694  break;
695  case 4:
696  ne10_radix_4_butterfly_float32_c (Fout, buffer, twiddles, fstride,
697  mstride, nfft, 0, is_inverse,
698  0); // Only scaling in the first stage.
699  break;
700  case 5:
701  ne10_radix_5_butterfly_float32_c (Fout, buffer, twiddles, fstride,
702  mstride, nfft, 0, is_inverse,
703  0); // Only scaling in the first stage.
704  break;
705  } // switch (radix)
706 
707  twiddles += mstride * (radix - 1);
708 
709  stage_count--;
710  } // while (stage_count)
711 }
712 
714  const ne10_fft_cpx_float32_t *Fin,
715  const ne10_int32_t *factors,
716  const ne10_fft_cpx_float32_t *twiddles,
717  ne10_fft_cpx_float32_t *buffer,
718  const ne10_int32_t is_scaled)
719 {
720  if (is_scaled)
721  {
722  ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
723  twiddles, buffer, 0,
724  1); // Scaling.
725  }
726  else
727  {
728  ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
729  twiddles, buffer, 0,
730  0); // Unscaling.
731  }
732 }
733 
735  const ne10_fft_cpx_float32_t *Fin,
736  const ne10_int32_t *factors,
737  const ne10_fft_cpx_float32_t *twiddles,
738  ne10_fft_cpx_float32_t *buffer,
739  const ne10_int32_t is_scaled)
740 {
741  if (is_scaled)
742  {
743  ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
744  twiddles, buffer, 1,
745  1); // Scaling
746  }
747  else
748  {
749  ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
750  twiddles, buffer, 1,
751  0); // unscaling
752  }
753 }
void ne10_mixed_radix_generic_butterfly_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
int32_t ne10_int32_t
Definition: NE10_types.h:76
#define ne10_swap_ptr(X, Y)
float ne10_float32_t
Definition: NE10_types.h:80
#define NE10_CPX_ADDTO(Z, X)
void ne10_mixed_radix_generic_butterfly_inverse_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
#define NE10_FREE(p)
Definition: NE10_macros.h:54
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define NE10_CPX_MUL_F32(Z, A, B)
ne10_float32_t i
Definition: NE10_types.h:233
ne10_float32_t r
Definition: NE10_types.h:232