Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_float32.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_float32.c
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
50 
51 /*
52  * This function calculates the FFT for power-of-two input sizes using an ordered, mixed
53  * radix-4/8 DIT algorithm.
54  *
55  * At each stage, 'fstride' holds the number of butterfly "sections" to be processed,
56  * while 'mstride' holds the number of butterflies to be performed in each section. After
57  * radix-4 butterflies, for example, we quarter the the 'fstride' (number of sections) and
58  * quadruple the 'mstride' (size of each section) for the next stage. The exception to
59  * this is the first stage, in which 'mstride' does not apply (as it is implicitly 1).
60  *
61  * The algorithm first performs either a radix-8 or radix-4 pass, depending on the size
62  * of the input/output (as dictated by the 'factors' array), and then continually applies
63  * radix-4 butterflies to completion. The order in which results are stored after each
64  * stage allows stages to load and store elements contiguously between iterations, and for
65  * the final output order to be correct.
66  */
67 static void ne10_mixed_radix_butterfly_float32_c (ne10_fft_cpx_float32_t *out,
69  ne10_int32_t *factors,
70  ne10_fft_cpx_float32_t *twiddles,
71  ne10_fft_cpx_float32_t *buffer)
72 {
73  ne10_int32_t stage_count = factors[0];
74  ne10_int32_t fstride = factors[1];
75  ne10_int32_t mstride = factors[(stage_count << 1) - 1];
76  ne10_int32_t first_radix = factors[stage_count << 1];
77  ne10_int32_t step, f_count, m_count;
78  ne10_fft_cpx_float32_t *src = in;
79  ne10_fft_cpx_float32_t *dst = out;
80  ne10_fft_cpx_float32_t *out_final = out;
81  ne10_fft_cpx_float32_t *tw, *tmp;
82  const ne10_float32_t TW_81 = 0.70710678;
83 
84  ne10_fft_cpx_float32_t scratch[16];
85  ne10_fft_cpx_float32_t scratch_in[8];
86  ne10_fft_cpx_float32_t scratch_out[8];
87  ne10_fft_cpx_float32_t scratch_tw[6];
88 
89  // The first stage (using hardcoded twiddles)
90  if (first_radix == 8) // For our factoring this means nfft is of form 2^{odd}
91  {
92  for (f_count = 0; f_count < fstride; f_count++)
93  {
94  dst = &out[f_count * 8];
95 
96  // X[0] +/- X[4N/8]
97  scratch_in[0].r = src[0].r + src[fstride * 4].r;
98  scratch_in[0].i = src[0].i + src[fstride * 4].i;
99  scratch_in[1].r = src[0].r - src[fstride * 4].r;
100  scratch_in[1].i = src[0].i - src[fstride * 4].i;
101 
102  // X[N/8] +/- X[5N/8]
103  scratch_in[2].r = src[fstride].r + src[fstride * 5].r;
104  scratch_in[2].i = src[fstride].i + src[fstride * 5].i;
105  scratch_in[3].r = src[fstride].r - src[fstride * 5].r;
106  scratch_in[3].i = src[fstride].i - src[fstride * 5].i;
107 
108  // X[2N/8] +/- X[6N/8]
109  scratch_in[4].r = src[fstride * 2].r + src[fstride * 6].r;
110  scratch_in[4].i = src[fstride * 2].i + src[fstride * 6].i;
111  scratch_in[5].r = src[fstride * 2].r - src[fstride * 6].r;
112  scratch_in[5].i = src[fstride * 2].i - src[fstride * 6].i;
113 
114  // X[3N/8] +/- X[7N/8]
115  scratch_in[6].r = src[fstride * 3].r + src[fstride * 7].r;
116  scratch_in[6].i = src[fstride * 3].i + src[fstride * 7].i;
117  scratch_in[7].r = src[fstride * 3].r - src[fstride * 7].r;
118  scratch_in[7].i = src[fstride * 3].i - src[fstride * 7].i;
119 
120 
121  scratch[0] = scratch_in[0]; // X[0] + X[4N/8]
122  scratch[1] = scratch_in[1]; // X[0] - X[4N/8]
123  scratch[2] = scratch_in[2]; // X[N/8] + X[5N/8]
124  scratch[4] = scratch_in[4]; // X[2N/8] + X[6N/8]
125  scratch[6] = scratch_in[6]; // X[3N/8] + X[7N/8]
126 
127  // (X[2N/8] - X[6N/8]) * -i
128  scratch[5].r = scratch_in[5].i;
129  scratch[5].i = -scratch_in[5].r;
130 
131  // (X[N/8] - X[5N/8]) * (TW_81 - TW_81i)
132  scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81;
133  scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81;
134 
135  // (X[3N/8] - X[7N/8]) * (TW_81 + TW_81i)
136  scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81;
137  scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81;
138 
139  // Combine the (X[0] +/- X[4N/8]) and (X[2N/8] +/- X[6N/8]) components
140  scratch[8].r = scratch[0].r + scratch[4].r;
141  scratch[8].i = scratch[0].i + scratch[4].i;
142  scratch[9].r = scratch[1].r + scratch[5].r;
143  scratch[9].i = scratch[1].i + scratch[5].i;
144  scratch[10].r = scratch[0].r - scratch[4].r;
145  scratch[10].i = scratch[0].i - scratch[4].i;
146  scratch[11].r = scratch[1].r - scratch[5].r;
147  scratch[11].i = scratch[1].i - scratch[5].i;
148 
149  // Combine the (X[N/8] +/- X[5N/8]) and (X[3N/8] +/- X[7N/8]) components
150  scratch[12].r = scratch[2].r + scratch[6].r;
151  scratch[12].i = scratch[2].i + scratch[6].i;
152  scratch[13].r = scratch[3].r - scratch[7].r;
153  scratch[13].i = scratch[3].i - scratch[7].i;
154  scratch[14].r = scratch[2].r - scratch[6].r;
155  scratch[14].i = scratch[2].i - scratch[6].i;
156  scratch[15].r = scratch[3].r + scratch[7].r;
157  scratch[15].i = scratch[3].i + scratch[7].i;
158 
159  // Combine the two combined components (for the full radix-8 butterfly)
160  scratch_out[0].r = scratch[8].r + scratch[12].r;
161  scratch_out[0].i = scratch[8].i + scratch[12].i;
162  scratch_out[1].r = scratch[9].r + scratch[13].r;
163  scratch_out[1].i = scratch[9].i + scratch[13].i;
164  scratch_out[2].r = scratch[10].r + scratch[14].i;
165  scratch_out[2].i = scratch[10].i - scratch[14].r;
166  scratch_out[3].r = scratch[11].r + scratch[15].i;
167  scratch_out[3].i = scratch[11].i - scratch[15].r;
168  scratch_out[4].r = scratch[8].r - scratch[12].r;
169  scratch_out[4].i = scratch[8].i - scratch[12].i;
170  scratch_out[5].r = scratch[9].r - scratch[13].r;
171  scratch_out[5].i = scratch[9].i - scratch[13].i;
172  scratch_out[6].r = scratch[10].r - scratch[14].i;
173  scratch_out[6].i = scratch[10].i + scratch[14].r;
174  scratch_out[7].r = scratch[11].r - scratch[15].i;
175  scratch_out[7].i = scratch[11].i + scratch[15].r;
176 
177  // Store the results
178  dst[0] = scratch_out[0];
179  dst[1] = scratch_out[1];
180  dst[2] = scratch_out[2];
181  dst[3] = scratch_out[3];
182  dst[4] = scratch_out[4];
183  dst[5] = scratch_out[5];
184  dst[6] = scratch_out[6];
185  dst[7] = scratch_out[7];
186 
187  src++;
188  } // f_count
189 
190  // Update variables for the next stages
191  step = fstride << 1; // For C2C, 1/4 of input size (fstride is nfft/8)
192  stage_count--;
193  fstride /= 4;
194  }
195  else if (first_radix == 4) // For our factoring this means nfft is of form 2^{even} >= 4
196  {
197  for (f_count = fstride; f_count; f_count--)
198  {
199  // Load the four input values for a radix-4 butterfly
200  scratch_in[0] = src[0]; // X[0]
201  scratch_in[1] = src[fstride * 1]; // X[N/4]
202  scratch_in[2] = src[fstride * 2]; // X[2N/4]
203  scratch_in[3] = src[fstride * 3]; // X[3N/4]
204 
205  // X[0] +/- X[2N/4]
206  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
207  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
208  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
209  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
210 
211  // X[N/4] +/- X[3N/4]
212  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
213  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
214  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
215  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
216 
217  // Combine the (X[0] +/- X[2N/4]) and (X[N/4] +/- X[3N/4]) components (for
218  // the full radix-4 butterfly)
219  scratch_out[0].r = scratch[0].r + scratch[2].r;
220  scratch_out[0].i = scratch[0].i + scratch[2].i;
221  scratch_out[1].r = scratch[1].r + scratch[3].i; // scratch[1] - i*scratch[3]
222  scratch_out[1].i = scratch[1].i - scratch[3].r;
223  scratch_out[2].r = scratch[0].r - scratch[2].r;
224  scratch_out[2].i = scratch[0].i - scratch[2].i;
225  scratch_out[3].r = scratch[1].r - scratch[3].i; // scratch[1] + i*scratch[3]
226  scratch_out[3].i = scratch[1].i + scratch[3].r;
227 
228  // Store the results
229  *dst++ = scratch_out[0];
230  *dst++ = scratch_out[1];
231  *dst++ = scratch_out[2];
232  *dst++ = scratch_out[3];
233 
234  src++;
235  } // f_count
236 
237  // Update variables for the next stages
238  step = fstride; // For C2C, 1/4 of input size (fstride is nfft/4)
239  stage_count--;
240  fstride /= 4;
241  }
242  else if (first_radix == 2) // nfft = 2
243  {
244  dst[0].r = src[0].r + src[1].r;
245  dst[0].i = src[0].i + src[1].i;
246  dst[1].r = src[0].r - src[1].r;
247  dst[1].i = src[0].i - src[1].i;
248  return;
249  }
250  else // nfft = 1
251  {
252  dst[0] = src[0];
253  return;
254  }
255 
256  // The next stage should read the output of the first stage as input
257  in = out;
258  out = buffer;
259 
260  // Middle stages (after the first, excluding the last)
261  for (; stage_count > 1; stage_count--)
262  {
263  src = in;
264  for (f_count = 0; f_count < fstride; f_count++)
265  {
266  dst = &out[f_count * (mstride * 4)];
267  tw = twiddles; // Reset the twiddle pointer for the next section
268  for (m_count = mstride; m_count; m_count--)
269  {
270  // Load the three twiddles and four input values for a radix-4 butterfly
271  scratch_tw[0] = tw[0]; // w^{k}
272  scratch_tw[1] = tw[mstride * 1]; // w^{2k}
273  scratch_tw[2] = tw[mstride * 2]; // w^{3k}
274  scratch_in[0] = src[0];
275  scratch_in[1] = src[step * 1];
276  scratch_in[2] = src[step * 2];
277  scratch_in[3] = src[step * 3];
278 
279  // Multiply input elements by their associated twiddles
280  scratch[0] = scratch_in[0];
281  scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
282  scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
283  scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
284  scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
285  scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
286  scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
287 
288  // X[0] +/- X[2N/4]
289  scratch[4].r = scratch[0].r + scratch[2].r;
290  scratch[4].i = scratch[0].i + scratch[2].i;
291  scratch[5].r = scratch[0].r - scratch[2].r;
292  scratch[5].i = scratch[0].i - scratch[2].i;
293 
294  // X[N/4] +/- X[3N/4]
295  scratch[6].r = scratch[1].r + scratch[3].r;
296  scratch[6].i = scratch[1].i + scratch[3].i;
297  scratch[7].r = scratch[1].r - scratch[3].r;
298  scratch[7].i = scratch[1].i - scratch[3].i;
299 
300  // Combine the (X[0] +/- X[2N/4]) and (X[N/4] +/- X[3N/4]) components (for
301  // the full radix-4 butterfly)
302  scratch_out[0].r = scratch[4].r + scratch[6].r;
303  scratch_out[0].i = scratch[4].i + scratch[6].i;
304  scratch_out[1].r = scratch[5].r + scratch[7].i;
305  scratch_out[1].i = scratch[5].i - scratch[7].r;
306  scratch_out[2].r = scratch[4].r - scratch[6].r;
307  scratch_out[2].i = scratch[4].i - scratch[6].i;
308  scratch_out[3].r = scratch[5].r - scratch[7].i;
309  scratch_out[3].i = scratch[5].i + scratch[7].r;
310 
311  // Store the results
312  dst[0] = scratch_out[0];
313  dst[mstride * 1] = scratch_out[1];
314  dst[mstride * 2] = scratch_out[2];
315  dst[mstride * 3] = scratch_out[3];
316 
317  tw++;
318  src++;
319  dst++;
320  } // m_count
321  } // f_count
322 
323  // Update variables for the next stages
324  twiddles += mstride * 3;
325  mstride *= 4;
326  fstride /= 4;
327 
328  // Swap the input and output buffers for the next stage
329  tmp = in;
330  in = out;
331  out = tmp;
332  } // stage_count
333 
334  // The last stage
335  if (stage_count)
336  {
337  src = in;
338 
339  // Always write to the final output buffer (if necessary, we can calculate this
340  // in-place as the final stage reads and writes at the same offsets)
341  dst = out_final;
342 
343  for (f_count = fstride; f_count; f_count--) // Note: for C2C, fstride = 1
344  {
345  tw = twiddles; // Reset the twiddle pointer for the next section
346  for (m_count = mstride; m_count; m_count--)
347  {
348  // Load the three twiddles and four input values for a radix-4 butterfly
349  scratch_tw[0] = tw[0];
350  scratch_tw[1] = tw[mstride * 1];
351  scratch_tw[2] = tw[mstride * 2];
352  scratch_in[0] = src[0];
353  scratch_in[1] = src[step * 1];
354  scratch_in[2] = src[step * 2];
355  scratch_in[3] = src[step * 3];
356 
357  // Multiply input elements by their associated twiddles
358  scratch[0] = scratch_in[0];
359  scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
360  scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
361  scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
362  scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
363  scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
364  scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
365 
366  // X[0] +/- X[2N/4]
367  scratch[4].r = scratch[0].r + scratch[2].r;
368  scratch[4].i = scratch[0].i + scratch[2].i;
369  scratch[5].r = scratch[0].r - scratch[2].r;
370  scratch[5].i = scratch[0].i - scratch[2].i;
371 
372  // X[N/4] +/- X[3N/4]
373  scratch[6].r = scratch[1].r + scratch[3].r;
374  scratch[6].i = scratch[1].i + scratch[3].i;
375  scratch[7].r = scratch[1].r - scratch[3].r;
376  scratch[7].i = scratch[1].i - scratch[3].i;
377 
378  // Combine the (X[0] +/- X[2N/4]) and (X[N/4] +/- X[3N/4]) components (for
379  // the full radix-4 butterfly)
380  scratch_out[0].r = scratch[4].r + scratch[6].r;
381  scratch_out[0].i = scratch[4].i + scratch[6].i;
382  scratch_out[1].r = scratch[5].r + scratch[7].i;
383  scratch_out[1].i = scratch[5].i - scratch[7].r;
384  scratch_out[2].r = scratch[4].r - scratch[6].r;
385  scratch_out[2].i = scratch[4].i - scratch[6].i;
386  scratch_out[3].r = scratch[5].r - scratch[7].i;
387  scratch_out[3].i = scratch[5].i + scratch[7].r;
388 
389  // Store the results
390  dst[0] = scratch_out[0];
391  dst[step * 1] = scratch_out[1];
392  dst[step * 2] = scratch_out[2];
393  dst[step * 3] = scratch_out[3];
394 
395  tw++;
396  src++;
397  dst++;
398  } // m_count
399  } // f_count
400  } // last stage
401 }
402 
403 /*
404  * This function calculates the inverse FFT, and is very similar in structure to its
405  * complement "ne10_mixed_radix_butterfly_float32_c".
406  */
407 static void ne10_mixed_radix_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t *out,
409  ne10_int32_t *factors,
410  ne10_fft_cpx_float32_t *twiddles,
411  ne10_fft_cpx_float32_t *buffer)
412 {
413  ne10_int32_t stage_count = factors[0];
414  ne10_int32_t fstride = factors[1];
415  ne10_int32_t mstride = factors[(stage_count << 1) - 1];
416  ne10_int32_t first_radix = factors[stage_count << 1];
417  ne10_float32_t one_by_nfft = (1.0f / (ne10_float32_t) (fstride * first_radix));
418  ne10_int32_t step, f_count, m_count;
419  ne10_fft_cpx_float32_t *src = in;
420  ne10_fft_cpx_float32_t *dst = out;
421  ne10_fft_cpx_float32_t *out_final = out;
422  ne10_fft_cpx_float32_t *tw, *tmp;
423  const ne10_float32_t TW_81 = 0.70710678;
424 
425  ne10_fft_cpx_float32_t scratch[16];
426  ne10_fft_cpx_float32_t scratch_in[8];
427  ne10_fft_cpx_float32_t scratch_out[8];
428  ne10_fft_cpx_float32_t scratch_tw[6];
429 
430  // The first stage (using hardcoded twiddles)
431  if (first_radix == 8) // nfft is of form 2^{odd}
432  {
433  for (f_count = 0; f_count < fstride; f_count++)
434  {
435  dst = &out[f_count * 8];
436 
437  // Prepare sums for the butterfly calculations
438  scratch_in[0].r = src[0].r + src[fstride * 4].r;
439  scratch_in[0].i = src[0].i + src[fstride * 4].i;
440  scratch_in[1].r = src[0].r - src[fstride * 4].r;
441  scratch_in[1].i = src[0].i - src[fstride * 4].i;
442  scratch_in[2].r = src[fstride].r + src[fstride * 5].r;
443  scratch_in[2].i = src[fstride].i + src[fstride * 5].i;
444  scratch_in[3].r = src[fstride].r - src[fstride * 5].r;
445  scratch_in[3].i = src[fstride].i - src[fstride * 5].i;
446  scratch_in[4].r = src[fstride * 2].r + src[fstride * 6].r;
447  scratch_in[4].i = src[fstride * 2].i + src[fstride * 6].i;
448  scratch_in[5].r = src[fstride * 2].r - src[fstride * 6].r;
449  scratch_in[5].i = src[fstride * 2].i - src[fstride * 6].i;
450  scratch_in[6].r = src[fstride * 3].r + src[fstride * 7].r;
451  scratch_in[6].i = src[fstride * 3].i + src[fstride * 7].i;
452  scratch_in[7].r = src[fstride * 3].r - src[fstride * 7].r;
453  scratch_in[7].i = src[fstride * 3].i - src[fstride * 7].i;
454 
455  // Multiply some of these by hardcoded radix-8 twiddles
456  scratch[0] = scratch_in[0];
457  scratch[1] = scratch_in[1];
458  scratch[2] = scratch_in[2];
459  scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81;
460  scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81;
461  scratch[4] = scratch_in[4];
462  scratch[5].r = -scratch_in[5].i;
463  scratch[5].i = scratch_in[5].r;
464  scratch[6].r = scratch_in[6].r;
465  scratch[6].i = scratch_in[6].i;
466  scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81;
467  scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81;
468 
469  // Combine the first set of pairs of these sums
470  scratch[8].r = scratch[0].r + scratch[4].r;
471  scratch[8].i = scratch[0].i + scratch[4].i;
472  scratch[9].r = scratch[1].r + scratch[5].r;
473  scratch[9].i = scratch[1].i + scratch[5].i;
474  scratch[10].r = scratch[0].r - scratch[4].r;
475  scratch[10].i = scratch[0].i - scratch[4].i;
476  scratch[11].r = scratch[1].r - scratch[5].r;
477  scratch[11].i = scratch[1].i - scratch[5].i;
478 
479  // Combine the second set of pairs of these sums
480  scratch[12].r = scratch[2].r + scratch[6].r;
481  scratch[12].i = scratch[2].i + scratch[6].i;
482  scratch[13].r = scratch[3].r - scratch[7].r;
483  scratch[13].i = scratch[3].i - scratch[7].i;
484  scratch[14].r = scratch[2].r - scratch[6].r;
485  scratch[14].i = scratch[2].i - scratch[6].i;
486  scratch[15].r = scratch[3].r + scratch[7].r;
487  scratch[15].i = scratch[3].i + scratch[7].i;
488 
489  // Combine these combined components (for the full radix-8 butterfly)
490  scratch_out[0].r = scratch[8].r + scratch[12].r;
491  scratch_out[0].i = scratch[8].i + scratch[12].i;
492  scratch_out[1].r = scratch[9].r + scratch[13].r;
493  scratch_out[1].i = scratch[9].i + scratch[13].i;
494  scratch_out[2].r = scratch[10].r - scratch[14].i;
495  scratch_out[2].i = scratch[10].i + scratch[14].r;
496  scratch_out[3].r = scratch[11].r - scratch[15].i;
497  scratch_out[3].i = scratch[11].i + scratch[15].r;
498  scratch_out[4].r = scratch[8].r - scratch[12].r;
499  scratch_out[4].i = scratch[8].i - scratch[12].i;
500  scratch_out[5].r = scratch[9].r - scratch[13].r;
501  scratch_out[5].i = scratch[9].i - scratch[13].i;
502  scratch_out[6].r = scratch[10].r + scratch[14].i;
503  scratch_out[6].i = scratch[10].i - scratch[14].r;
504  scratch_out[7].r = scratch[11].r + scratch[15].i;
505  scratch_out[7].i = scratch[11].i - scratch[15].r;
506 
507  // Store the results
508  dst[0] = scratch_out[0];
509  dst[1] = scratch_out[1];
510  dst[2] = scratch_out[2];
511  dst[3] = scratch_out[3];
512  dst[4] = scratch_out[4];
513  dst[5] = scratch_out[5];
514  dst[6] = scratch_out[6];
515  dst[7] = scratch_out[7];
516 
517  src++;
518  } // f_count
519 
520  // Update variables for the next stages
521  step = fstride << 1;
522  stage_count--;
523  fstride /= 4;
524 
525  if (stage_count == 0)
526  {
527  dst = out;
528  for (f_count = 0; f_count < 8; f_count++)
529  {
530  dst[f_count].r *= one_by_nfft;
531  dst[f_count].i *= one_by_nfft;
532  }
533  }
534  }
535  else if (first_radix == 4) // nfft is of form 2^{even} >= 4
536  {
537  for (f_count = fstride; f_count; f_count--)
538  {
539  // Load the four input values for a radix-4 butterfly
540  scratch_in[0] = src[0];
541  scratch_in[1] = src[fstride * 1];
542  scratch_in[2] = src[fstride * 2];
543  scratch_in[3] = src[fstride * 3];
544 
545  // Prepare the first set of sums for the butterfly calculations
546  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
547  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
548  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
549  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
550 
551  // Prepare the second set of sums for the butterfly calculations
552  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
553  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
554  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
555  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
556 
557  // Combine these sums (for the full radix-4 butterfly)
558  scratch_out[0].r = scratch[0].r + scratch[2].r;
559  scratch_out[0].i = scratch[0].i + scratch[2].i;
560  scratch_out[1].r = scratch[1].r - scratch[3].i;
561  scratch_out[1].i = scratch[1].i + scratch[3].r;
562  scratch_out[2].r = scratch[0].r - scratch[2].r;
563  scratch_out[2].i = scratch[0].i - scratch[2].i;
564  scratch_out[3].r = scratch[1].r + scratch[3].i;
565  scratch_out[3].i = scratch[1].i - scratch[3].r;
566 
567  // Store the results
568  *dst++ = scratch_out[0];
569  *dst++ = scratch_out[1];
570  *dst++ = scratch_out[2];
571  *dst++ = scratch_out[3];
572 
573  src++;
574  } // f_count
575 
576  // Update variables for the next stages
577  step = fstride;
578  stage_count--;
579  fstride /= 4;
580 
581  if (stage_count == 0)
582  {
583  dst = out;
584  for (f_count = 0; f_count < 4; f_count++)
585  {
586  dst[f_count].r *= one_by_nfft;
587  dst[f_count].i *= one_by_nfft;
588  }
589  }
590  }
591  else if (first_radix == 2) // nfft = 2
592  {
593  dst[0].r = (src[0].r + src[1].r) * one_by_nfft;
594  dst[0].i = (src[0].i + src[1].i) * one_by_nfft;
595  dst[1].r = (src[0].r - src[1].r) * one_by_nfft;
596  dst[1].i = (src[0].i - src[1].i) * one_by_nfft;
597  return;
598  }
599  else // nfft = 1
600  {
601  dst[0] = src[0];
602  return;
603  }
604 
605  // The next stage should read the output of the first stage as input
606  in = out;
607  out = buffer;
608 
609  // Middle stages (after the first, excluding the last)
610  for (; stage_count > 1; stage_count--)
611  {
612  src = in;
613  for (f_count = 0; f_count < fstride; f_count++)
614  {
615  dst = &out[f_count * (mstride * 4)];
616  tw = twiddles;
617  for (m_count = mstride; m_count ; m_count --)
618  {
619  // Load the three twiddles and four input values for a radix-4 butterfly
620  scratch_tw[0] = tw[0];
621  scratch_tw[1] = tw[mstride * 1];
622  scratch_tw[2] = tw[mstride * 2];
623  scratch_in[0] = src[0];
624  scratch_in[1] = src[step * 1];
625  scratch_in[2] = src[step * 2];
626  scratch_in[3] = src[step * 3];
627 
628  // Multiply input elements by their associated twiddles
629  scratch[0] = scratch_in[0];
630  scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
631  scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
632  scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
633  scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
634  scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
635  scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
636 
637  // Prepare the first set of sums for the butterfly calculations
638  scratch[4].r = scratch[0].r + scratch[2].r;
639  scratch[4].i = scratch[0].i + scratch[2].i;
640  scratch[5].r = scratch[0].r - scratch[2].r;
641  scratch[5].i = scratch[0].i - scratch[2].i;
642 
643  // Prepare the second set of sums for the butterfly calculations
644  scratch[6].r = scratch[1].r + scratch[3].r;
645  scratch[6].i = scratch[1].i + scratch[3].i;
646  scratch[7].r = scratch[1].r - scratch[3].r;
647  scratch[7].i = scratch[1].i - scratch[3].i;
648 
649  // Combine these sums (for the full radix-4 butterfly)
650  scratch_out[0].r = scratch[4].r + scratch[6].r;
651  scratch_out[0].i = scratch[4].i + scratch[6].i;
652  scratch_out[1].r = scratch[5].r - scratch[7].i;
653  scratch_out[1].i = scratch[5].i + scratch[7].r;
654  scratch_out[2].r = scratch[4].r - scratch[6].r;
655  scratch_out[2].i = scratch[4].i - scratch[6].i;
656  scratch_out[3].r = scratch[5].r + scratch[7].i;
657  scratch_out[3].i = scratch[5].i - scratch[7].r;
658 
659  // Store the results
660  dst[0] = scratch_out[0];
661  dst[mstride * 1] = scratch_out[1];
662  dst[mstride * 2] = scratch_out[2];
663  dst[mstride * 3] = scratch_out[3];
664 
665  tw++;
666  src++;
667  dst++;
668  } // m_count
669  } // f_count
670 
671  // Update variables for the next stages
672  twiddles += mstride * 3;
673  mstride *= 4;
674  fstride /= 4;
675 
676  // Swap the input and output buffers for the next stage
677  tmp = in;
678  in = out;
679  out = tmp;
680  } // stage_count
681 
682  // The last stage
683  if (stage_count)
684  {
685  src = in;
686 
687  // Always write to the final output buffer (if necessary, we can calculate this
688  // in-place as the final stage reads and writes at the same offsets)
689  dst = out_final;
690 
691  for (f_count = 0; f_count < fstride; f_count++)
692  {
693  tw = twiddles;
694  for (m_count = mstride; m_count; m_count--)
695  {
696  // Load the three twiddles and four input values for a radix-4 butterfly
697  scratch_tw[0] = tw[0];
698  scratch_tw[1] = tw[mstride * 1];
699  scratch_tw[2] = tw[mstride * 2];
700  scratch_in[0] = src[0];
701  scratch_in[1] = src[step * 1];
702  scratch_in[2] = src[step * 2];
703  scratch_in[3] = src[step * 3];
704 
705  // Multiply input elements by their associated twiddles
706  scratch[0] = scratch_in[0];
707  scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
708  scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
709  scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
710  scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
711  scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
712  scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
713 
714  // Prepare the first set of sums for the butterfly calculations
715  scratch[4].r = scratch[0].r + scratch[2].r;
716  scratch[4].i = scratch[0].i + scratch[2].i;
717  scratch[5].r = scratch[0].r - scratch[2].r;
718  scratch[5].i = scratch[0].i - scratch[2].i;
719 
720  // Prepare the second set of sums for the butterfly calculations
721  scratch[6].r = scratch[1].r + scratch[3].r;
722  scratch[6].i = scratch[1].i + scratch[3].i;
723  scratch[7].r = scratch[1].r - scratch[3].r;
724  scratch[7].i = scratch[1].i - scratch[3].i;
725 
726  // Combine these sums (for the full radix-4 butterfly) and multiply by
727  // (1 / nfft).
728  scratch_out[0].r = (scratch[4].r + scratch[6].r) * one_by_nfft;
729  scratch_out[0].i = (scratch[4].i + scratch[6].i) * one_by_nfft;
730  scratch_out[1].r = (scratch[5].r - scratch[7].i) * one_by_nfft;
731  scratch_out[1].i = (scratch[5].i + scratch[7].r) * one_by_nfft;
732  scratch_out[2].r = (scratch[4].r - scratch[6].r) * one_by_nfft;
733  scratch_out[2].i = (scratch[4].i - scratch[6].i) * one_by_nfft;
734  scratch_out[3].r = (scratch[5].r + scratch[7].i) * one_by_nfft;
735  scratch_out[3].i = (scratch[5].i - scratch[7].r) * one_by_nfft;
736 
737  // Store the results
738  dst[0] = scratch_out[0];
739  dst[step * 1] = scratch_out[1];
740  dst[step * 2] = scratch_out[2];
741  dst[step * 3] = scratch_out[3];
742 
743  tw++;
744  src++;
745  dst++;
746  } // m_count
747  } // f_count
748  } // last stage
749 }
750 
751 static void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst,
752  const ne10_fft_cpx_float32_t *src,
753  ne10_fft_cpx_float32_t *twiddles,
754  ne10_int32_t ncfft)
755 {
756  ne10_int32_t k;
757  ne10_fft_cpx_float32_t fpnk, fpk, f1k, f2k, tw, tdc;
758 
759  tdc.r = src[0].r;
760  tdc.i = src[0].i;
761 
762  dst[0].r = tdc.r + tdc.i;
763  dst[ncfft].r = tdc.r - tdc.i;
764  dst[ncfft].i = dst[0].i = 0;
765 
766  for (k = 1; k <= ncfft / 2 ; ++k)
767  {
768  fpk = src[k];
769  fpnk.r = src[ncfft - k].r;
770  fpnk.i = - src[ncfft - k].i;
771 
772  f1k.r = fpk.r + fpnk.r;
773  f1k.i = fpk.i + fpnk.i;
774 
775  f2k.r = fpk.r - fpnk.r;
776  f2k.i = fpk.i - fpnk.i;
777 
778  tw.r = f2k.r * (twiddles[k - 1]).r - f2k.i * (twiddles[k - 1]).i;
779  tw.i = f2k.r * (twiddles[k - 1]).i + f2k.i * (twiddles[k - 1]).r;
780 
781  dst[k].r = (f1k.r + tw.r) * 0.5f;
782  dst[k].i = (f1k.i + tw.i) * 0.5f;
783  dst[ncfft - k].r = (f1k.r - tw.r) * 0.5f;
784  dst[ncfft - k].i = (tw.i - f1k.i) * 0.5f;
785  }
786 }
787 
788 static void ne10_fft_split_c2r_1d_float32 (ne10_fft_cpx_float32_t *dst,
789  const ne10_fft_cpx_float32_t *src,
790  ne10_fft_cpx_float32_t *twiddles,
791  ne10_int32_t ncfft)
792 {
793 
794  ne10_int32_t k;
795  ne10_fft_cpx_float32_t fk, fnkc, fek, fok, tmp;
796 
797 
798  dst[0].r = (src[0].r + src[ncfft].r) * 0.5f;
799  dst[0].i = (src[0].r - src[ncfft].r) * 0.5f;
800 
801  for (k = 1; k <= ncfft / 2; k++)
802  {
803  fk = src[k];
804  fnkc.r = src[ncfft - k].r;
805  fnkc.i = -src[ncfft - k].i;
806 
807  fek.r = fk.r + fnkc.r;
808  fek.i = fk.i + fnkc.i;
809 
810  tmp.r = fk.r - fnkc.r;
811  tmp.i = fk.i - fnkc.i;
812 
813  fok.r = tmp.r * twiddles[k - 1].r + tmp.i * twiddles[k - 1].i;
814  fok.i = tmp.i * twiddles[k - 1].r - tmp.r * twiddles[k - 1].i;
815 
816  dst[k].r = (fek.r + fok.r) * 0.5f;
817  dst[k].i = (fek.i + fok.i) * 0.5f;
818 
819  dst[ncfft - k].r = (fek.r - fok.r) * 0.5f;
820  dst[ncfft - k].i = (fok.i - fek.i) * 0.5f;
821  }
822 }
823 
829 {
830  ne10_fft_cfg_float32_t st = NULL;
831  ne10_uint32_t memneeded = sizeof (ne10_fft_state_float32_t)
832  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
833  + sizeof (ne10_fft_cpx_float32_t) * nfft /* twiddles */
834  + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer */
835  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */
836 
837  st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded);
838 
839  // Only backward FFT is scaled by default.
840  st->is_forward_scaled = 0;
841  st->is_backward_scaled = 1;
842 
843  if (st == NULL)
844  {
845  return st;
846  }
847 
848  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_float32_t);
850  st->factors = (ne10_int32_t*) address;
852  st->buffer = st->twiddles + nfft;
853  st->nfft = nfft;
854 
855  ne10_int32_t result;
856  result = ne10_factor (nfft, st->factors, NE10_FACTOR_EIGHT_FIRST_STAGE);
857  if (result == NE10_ERR)
858  {
859  NE10_FREE (st);
860  return NULL;
861  }
862 
863  // Disable radix 8 for generic FFTs (it doesn't work properly)
864  {
865  ne10_int32_t stage_count = st->factors[0];
866  ne10_int32_t algorithm_flag = st->factors[2 * (stage_count + 1)];
867 
868  if (algorithm_flag == NE10_FFT_ALG_ANY)
869  {
870  result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_DEFAULT);
871  if (result == NE10_ERR)
872  {
873  NE10_FREE (st);
874  return NULL;
875  }
876  }
877  }
878 
880 
881  return st;
882 }
883 
891  ne10_int32_t inverse_fft)
892 {
893  ne10_int32_t stage_count = cfg->factors[0];
894  ne10_int32_t algorithm_flag = cfg->factors[2 * (stage_count + 1)];
895 
896  assert ((algorithm_flag == NE10_FFT_ALG_DEFAULT)
897  || (algorithm_flag == NE10_FFT_ALG_ANY));
898 
899  switch (algorithm_flag)
900  {
902  if (inverse_fft)
903  {
904  ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
905  }
906  else
907  {
908  ne10_mixed_radix_butterfly_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
909  }
910  break;
911  case NE10_FFT_ALG_ANY:
912  if (inverse_fft)
913  {
915  cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_backward_scaled);
916  }
917  else
918  {
920  cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_forward_scaled);
921  }
922  break;
923  }
924 }
925 
926 // For NE10_UNROLL_LEVEL > 0, please refer to NE10_rfft_float.c
927 #if (NE10_UNROLL_LEVEL == 0)
928 
944 {
945  ne10_fft_r2c_cfg_float32_t st = NULL;
946  ne10_int32_t ncfft = nfft >> 1;
947 
948  ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_float32_t)
949  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
950  + sizeof (ne10_fft_cpx_float32_t) * ncfft /* twiddle */
951  + sizeof (ne10_fft_cpx_float32_t) * (ncfft / 2) /* super twiddles */
952  + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer */
953  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */
954 
955  st = (ne10_fft_r2c_cfg_float32_t) NE10_MALLOC (memneeded);
956 
957  if (st)
958  {
959  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_float32_t);
961  st->factors = (ne10_int32_t*) address;
963  st->super_twiddles = st->twiddles + ncfft;
964  st->buffer = st->super_twiddles + (ncfft / 2);
965  st->ncfft = ncfft;
966 
968  if (result == NE10_ERR)
969  {
970  NE10_FREE (st);
971  return NULL;
972  }
973 
974  ne10_int32_t j, k;
975  ne10_int32_t *factors = st->factors;
976  ne10_fft_cpx_float32_t *twiddles = st->twiddles;
977  ne10_int32_t stage_count = factors[0];
978  ne10_int32_t fstride = factors[1];
979  ne10_int32_t mstride;
980  ne10_int32_t cur_radix;
981  ne10_float32_t phase;
982  const ne10_float32_t pi = NE10_PI;
983 
984  // Don't generate any twiddles for the first stage
985  stage_count --;
986 
987  // Generate twiddles for the other stages
988  for (; stage_count > 0; stage_count --)
989  {
990  cur_radix = factors[2 * stage_count];
991  fstride /= cur_radix;
992  mstride = factors[2 * stage_count + 1];
993  for (j = 0; j < mstride; j++)
994  {
995  for (k = 1; k < cur_radix; k++) // phase = 1 when k = 0
996  {
997  phase = -2 * pi * fstride * k * j / ncfft;
998  twiddles[mstride * (k - 1) + j].r = (ne10_float32_t) cos (phase);
999  twiddles[mstride * (k - 1) + j].i = (ne10_float32_t) sin (phase);
1000  }
1001  }
1002  twiddles += mstride * (cur_radix - 1);
1003  }
1004 
1005  twiddles = st->super_twiddles;
1006  for (j = 0; j < ncfft / 2; j++)
1007  {
1008  phase = -pi * ( (ne10_float32_t) (j + 1) / ncfft + 0.5f);
1009  twiddles->r = (ne10_float32_t) cos (phase);
1010  twiddles->i = (ne10_float32_t) sin (phase);
1011  twiddles++;
1012  }
1013  }
1014 
1015  return st;
1016 }
1017 
1023  ne10_float32_t *fin,
1025 {
1026  ne10_fft_cpx_float32_t * tmpbuf = cfg->buffer;
1027 
1028  ne10_mixed_radix_butterfly_float32_c (tmpbuf, (ne10_fft_cpx_float32_t*) fin, cfg->factors, cfg->twiddles, fout);
1029  ne10_fft_split_r2c_1d_float32 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft);
1030 }
1031 
1039 {
1040  ne10_fft_cpx_float32_t * tmpbuf1 = cfg->buffer;
1041  ne10_fft_cpx_float32_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1042 
1043  ne10_fft_split_c2r_1d_float32 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft);
1044  ne10_mixed_radix_butterfly_inverse_float32_c ( (ne10_fft_cpx_float32_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2);
1045 }
1046 
1047 #endif // NE10_UNROLL_LEVEL
#define NE10_FFT_ALG_DEFAULT
Definition: NE10_fft.h:57
ne10_int32_t is_backward_scaled
Flag to control scaling behaviour in backward floating point complex FFT.
Definition: NE10_types.h:261
void ne10_mixed_radix_generic_butterfly_inverse_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_c2r_1d_float32 using plain C.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_float32 and ne10_fft_c2r_1d_float32...
#define NE10_MAXFACTORS
Structure for the floating point FFT function.
Definition: NE10_types.h:229
int32_t ne10_int32_t
Definition: NE10_types.h:76
ne10_int32_t ne10_factor(ne10_int32_t n, ne10_int32_t *facbuf, ne10_int32_t ne10_factor_flags)
Definition: NE10_fft.c:71
float ne10_float32_t
Definition: NE10_types.h:80
Structure for the floating point FFT state.
Definition: NE10_types.h:239
#define NE10_FFT_BYTE_ALIGNMENT
Definition: NE10_fft.h:45
ne10_int32_t * factors
Definition: NE10_types.h:242
ne10_fft_state_float32_t * ne10_fft_cfg_float32_t
Configuration structure for floating point FFT.
Definition: NE10_types.h:267
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define NE10_PI
NE10 defines a number of macros for use in its function signatures.
Definition: NE10_macros.h:47
ne10_fft_cpx_float32_t * twiddles
Definition: NE10_types.h:275
ne10_fft_cpx_float32_t * twiddles
Definition: NE10_types.h:243
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_float32 for ne10_fft_c2c_1d_float32_c.
#define NE10_FREE(p)
Definition: NE10_macros.h:54
ne10_fft_cpx_float32_t * super_twiddles
Definition: NE10_types.h:276
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Specific implementation of ne10_fft_c2c_1d_float32 using plain C.
#define NE10_FACTOR_EIGHT_FIRST_STAGE
Definition: NE10_fft.h:72
ne10_fft_cpx_float32_t * ne10_fft_generate_twiddles_float32(ne10_fft_cpx_float32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
Definition: NE10_fft.c:320
#define NE10_FFT_ALG_ANY
Definition: NE10_fft.h:58
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define NE10_BYTE_ALIGNMENT(address, alignment)
Definition: NE10_macros.h:63
#define NE10_ERR
Definition: NE10_types.h:66
void ne10_mixed_radix_generic_butterfly_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
ne10_float32_t i
Definition: NE10_types.h:233
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_r2c_1d_float32 using plain C.
ne10_fft_cpx_float32_t * buffer
Definition: NE10_types.h:271
ne10_fft_cpx_float32_t * buffer
Definition: NE10_types.h:244
ne10_int32_t is_forward_scaled
Flag to control scaling behaviour in forward floating point complex FFT.
Definition: NE10_types.h:253
ne10_float32_t r
Definition: NE10_types.h:232
#define NE10_FACTOR_DEFAULT
Definition: NE10_fft.h:71
ne10_fft_r2c_state_float32_t * ne10_fft_r2c_cfg_float32_t
Definition: NE10_types.h:289