Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_int16.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_int16.c
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
50 
51 
52 static void ne10_mixed_radix_butterfly_int16_c (ne10_fft_cpx_int16_t * Fout,
54  ne10_int32_t * factors,
55  ne10_fft_cpx_int16_t * twiddles,
56  ne10_fft_cpx_int16_t * buffer,
57  ne10_int32_t scaled_flag)
58 {
59  ne10_int32_t fstride, mstride, N;
60  ne10_int32_t f_count, m_count;
61  ne10_int32_t stage_count;
62 
63  ne10_fft_cpx_int16_t scratch_in[8];
64  ne10_fft_cpx_int16_t scratch_out[8];
65  ne10_fft_cpx_int16_t scratch[16];
66  ne10_fft_cpx_int16_t scratch_tw[6];
67 
68  ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
69  ne10_fft_cpx_int16_t *Fout_ls = Fout;
71  ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
72  const ne10_int32_t TW_81 = 23169;
73  const ne10_int32_t TW_81N = -23169;
74 
75 
76 
77  // init fstride, mstride, N, tw
78  stage_count = factors[0];
79  fstride = factors[1];
80  mstride = factors[ (stage_count << 1) - 1 ];
81  N = factors[ stage_count << 1 ]; // radix
82  tw = twiddles;
83 
84  // the first stage
85  Fin1 = Fin;
86  Fout1 = Fout;
87  if (N == 8) // length of FFT is 2^n (n is odd)
88  {
89  // radix 8
90  N = fstride << 1; // 1/4 of length of FFT
91  tw = twiddles;
92 
93  Fin1 = Fin;
94  for (f_count = 0; f_count < fstride; f_count ++)
95  {
96  Fout1 = & Fout[ f_count * 8 ];
97  // load
98  if (scaled_flag == 1)
99  {
100  NE10_F2I16_FIXDIV (Fin1[0], 8);
101  NE10_F2I16_FIXDIV (Fin1[fstride * 4], 8);
102  NE10_F2I16_FIXDIV (Fin1[fstride], 8);
103  NE10_F2I16_FIXDIV (Fin1[fstride * 5], 8);
104  NE10_F2I16_FIXDIV (Fin1[fstride * 2], 8);
105  NE10_F2I16_FIXDIV (Fin1[fstride * 6], 8);
106  NE10_F2I16_FIXDIV (Fin1[fstride * 3], 8);
107  NE10_F2I16_FIXDIV (Fin1[fstride * 7], 8);
108  }
109  scratch_in[0].r = Fin1[0].r + Fin1[fstride * 4].r;
110  scratch_in[0].i = Fin1[0].i + Fin1[fstride * 4].i;
111  scratch_in[1].r = Fin1[0].r - Fin1[fstride * 4].r;
112  scratch_in[1].i = Fin1[0].i - Fin1[fstride * 4].i;
113  scratch_in[2].r = Fin1[fstride].r + Fin1[fstride * 5].r;
114  scratch_in[2].i = Fin1[fstride].i + Fin1[fstride * 5].i;
115  scratch_in[3].r = Fin1[fstride].r - Fin1[fstride * 5].r;
116  scratch_in[3].i = Fin1[fstride].i - Fin1[fstride * 5].i;
117  scratch_in[4].r = Fin1[fstride * 2].r + Fin1[fstride * 6].r;
118  scratch_in[4].i = Fin1[fstride * 2].i + Fin1[fstride * 6].i;
119  scratch_in[5].r = Fin1[fstride * 2].r - Fin1[fstride * 6].r;
120  scratch_in[5].i = Fin1[fstride * 2].i - Fin1[fstride * 6].i;
121  scratch_in[6].r = Fin1[fstride * 3].r + Fin1[fstride * 7].r;
122  scratch_in[6].i = Fin1[fstride * 3].i + Fin1[fstride * 7].i;
123  scratch_in[7].r = Fin1[fstride * 3].r - Fin1[fstride * 7].r;
124  scratch_in[7].i = Fin1[fstride * 3].i - Fin1[fstride * 7].i;
125 
126  // radix 4 butterfly without twiddles
127  scratch[0] = scratch_in[0];
128  scratch[1] = scratch_in[1];
129 
130  scratch[2] = scratch_in[2];
131  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r + scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
132  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i - scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
133 
134  scratch[4] = scratch_in[4];
135  scratch[5].r = scratch_in[5].i;
136  scratch[5].i = -scratch_in[5].r;
137 
138  scratch[6].r = scratch_in[6].r;
139  scratch[6].i = scratch_in[6].i;
140  scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r - scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
141  scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i + scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
142 
143  // radix 2 butterfly
144  scratch[8].r = scratch[0].r + scratch[4].r;
145  scratch[8].i = scratch[0].i + scratch[4].i;
146  scratch[9].r = scratch[1].r + scratch[5].r;
147  scratch[9].i = scratch[1].i + scratch[5].i;
148 
149  scratch[10].r = scratch[0].r - scratch[4].r;
150  scratch[10].i = scratch[0].i - scratch[4].i;
151  scratch[11].r = scratch[1].r - scratch[5].r;
152  scratch[11].i = scratch[1].i - scratch[5].i;
153 
154  // radix 2 butterfly
155  scratch[12].r = scratch[2].r + scratch[6].r;
156  scratch[12].i = scratch[2].i + scratch[6].i;
157  scratch[13].r = scratch[3].r + scratch[7].r;
158  scratch[13].i = scratch[3].i + scratch[7].i;
159 
160  scratch[14].r = scratch[2].r - scratch[6].r;
161  scratch[14].i = scratch[2].i - scratch[6].i;
162  scratch[15].r = scratch[3].r - scratch[7].r;
163  scratch[15].i = scratch[3].i - scratch[7].i;
164 
165  // third result
166  scratch_out[4].r = scratch[8].r - scratch[12].r;
167  scratch_out[4].i = scratch[8].i - scratch[12].i;
168  scratch_out[5].r = scratch[9].r - scratch[13].r;
169  scratch_out[5].i = scratch[9].i - scratch[13].i;
170 
171  // first result
172  scratch_out[0].r = scratch[8].r + scratch[12].r;
173  scratch_out[0].i = scratch[8].i + scratch[12].i;
174  scratch_out[1].r = scratch[9].r + scratch[13].r;
175  scratch_out[1].i = scratch[9].i + scratch[13].i;
176 
177  // second result
178  scratch_out[2].r = scratch[10].r + scratch[14].i;
179  scratch_out[2].i = scratch[10].i - scratch[14].r;
180  scratch_out[3].r = scratch[11].r + scratch[15].i;
181  scratch_out[3].i = scratch[11].i - scratch[15].r;
182 
183  // forth result
184  scratch_out[6].r = scratch[10].r - scratch[14].i;
185  scratch_out[6].i = scratch[10].i + scratch[14].r;
186  scratch_out[7].r = scratch[11].r - scratch[15].i;
187  scratch_out[7].i = scratch[11].i + scratch[15].r;
188 
189  // store
190  Fout1[0] = scratch_out[0];
191  Fout1[1] = scratch_out[1];
192  Fout1[2] = scratch_out[2];
193  Fout1[3] = scratch_out[3];
194  Fout1[4] = scratch_out[4];
195  Fout1[5] = scratch_out[5];
196  Fout1[6] = scratch_out[6];
197  Fout1[7] = scratch_out[7];
198 
199  Fin1 += 1;
200  } // f_count
201  fstride >>= 2;
202  stage_count--;
203 
204  // swap
205  Ftmp = buffer;
206  buffer = Fout;
207  Fout = Ftmp;
208  }
209  else if (N == 4) // length of FFT is 2^n (n is even) >= 4
210  {
211  //fstride is nfft>>2
212  for (f_count = fstride; f_count ; f_count --)
213  {
214  // load
215  scratch_in[0] = *Fin1;
216  Fin2 = Fin1 + fstride;
217  scratch_in[1] = *Fin2;
218  Fin2 = Fin2 + fstride;
219  scratch_in[2] = *Fin2;
220  Fin2 = Fin2 + fstride;
221  scratch_in[3] = *Fin2;
222 
223  // radix 4 butterfly without twiddles
224  if (scaled_flag == 1)
225  {
226  NE10_F2I16_FIXDIV (scratch_in[0], 4);
227  NE10_F2I16_FIXDIV (scratch_in[1], 4);
228  NE10_F2I16_FIXDIV (scratch_in[2], 4);
229  NE10_F2I16_FIXDIV (scratch_in[3], 4);
230  }
231 
232  // radix 2 butterfly
233  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
234  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
235 
236  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
237  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
238 
239  // radix 2 butterfly
240  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
241  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
242 
243  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
244  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
245 
246  // third result
247  scratch_out[2].r = scratch[0].r - scratch[2].r;
248  scratch_out[2].i = scratch[0].i - scratch[2].i;
249 
250  // first result
251  scratch_out[0].r = scratch[0].r + scratch[2].r;
252  scratch_out[0].i = scratch[0].i + scratch[2].i;
253 
254  // second result
255  scratch_out[1].r = scratch[1].r + scratch[3].i;
256  scratch_out[1].i = scratch[1].i - scratch[3].r;
257 
258  // forth result
259  scratch_out[3].r = scratch[1].r - scratch[3].i;
260  scratch_out[3].i = scratch[1].i + scratch[3].r;
261 
262  // store
263  * Fout1 ++ = scratch_out[0];
264  * Fout1 ++ = scratch_out[1];
265  * Fout1 ++ = scratch_out[2];
266  * Fout1 ++ = scratch_out[3];
267 
268  Fin1++;
269  } // f_count
270 
271  N = fstride; // 1/4 of length of FFT
272 
273  // swap
274  Ftmp = buffer;
275  buffer = Fout;
276  Fout = Ftmp;
277 
278  // update address for other stages
279  stage_count--;
280  fstride >>= 2;
281  // end of first stage
282  }
283  else if (N == 2) // Length of FFT is 2
284  {
285  scratch_in[0] = Fin1[0];
286  scratch_in[1] = Fin1[1];
287 
288  if (scaled_flag == 1)
289  {
290  NE10_F2I16_FIXDIV (scratch_in[0], 2);
291  NE10_F2I16_FIXDIV (scratch_in[1], 2);
292  }
293 
294  Fout1[0].r = scratch_in[0].r + scratch_in[1].r;
295  Fout1[0].i = scratch_in[0].i + scratch_in[1].i;
296  Fout1[1].r = scratch_in[0].r - scratch_in[1].r;
297  Fout1[1].i = scratch_in[0].i - scratch_in[1].i;
298  return;
299  }
300  else // Length of FFT is 1
301  {
302  Fout1[0] = Fin1[0];
303  return;
304  }
305 
306 
307  // others but the last one
308  for (; stage_count > 1 ; stage_count--)
309  {
310  Fin1 = buffer;
311  for (f_count = 0; f_count < fstride; f_count ++)
312  {
313  Fout1 = & Fout[ f_count * mstride << 2 ];
314  tw1 = tw;
315  for (m_count = mstride; m_count ; m_count --)
316  {
317  // load
318  scratch_tw[0] = *tw1;
319  tw2 = tw1 + mstride;
320  scratch_tw[1] = *tw2;
321  tw2 += mstride;
322  scratch_tw[2] = *tw2;
323  scratch_in[0] = * Fin1;
324  Fin2 = Fin1 + N;
325  scratch_in[1] = * Fin2;
326  Fin2 += N;
327  scratch_in[2] = * Fin2;
328  Fin2 += N;
329  scratch_in[3] = * Fin2;
330  if (scaled_flag == 1)
331  {
332  NE10_F2I16_FIXDIV (scratch_in[0], 4);
333  NE10_F2I16_FIXDIV (scratch_in[1], 4);
334  NE10_F2I16_FIXDIV (scratch_in[2], 4);
335  NE10_F2I16_FIXDIV (scratch_in[3], 4);
336  }
337 
338  // radix 4 butterfly with twiddles
339 
340  scratch[0] = scratch_in[0];
341  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
342  - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
343  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
344  + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
345 
346  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
347  - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
348  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
349  + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
350 
351  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
352  - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
353  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
354  + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
355 
356  // radix 2 butterfly
357  scratch[4].r = scratch[0].r + scratch[2].r;
358  scratch[4].i = scratch[0].i + scratch[2].i;
359 
360  scratch[5].r = scratch[0].r - scratch[2].r;
361  scratch[5].i = scratch[0].i - scratch[2].i;
362 
363  // radix 2 butterfly
364  scratch[6].r = scratch[1].r + scratch[3].r;
365  scratch[6].i = scratch[1].i + scratch[3].i;
366 
367  scratch[7].r = scratch[1].r - scratch[3].r;
368  scratch[7].i = scratch[1].i - scratch[3].i;
369 
370  // third result
371  scratch_out[2].r = scratch[4].r - scratch[6].r;
372  scratch_out[2].i = scratch[4].i - scratch[6].i;
373 
374  // first result
375  scratch_out[0].r = scratch[4].r + scratch[6].r;
376  scratch_out[0].i = scratch[4].i + scratch[6].i;
377 
378  // second result
379  scratch_out[1].r = scratch[5].r + scratch[7].i;
380  scratch_out[1].i = scratch[5].i - scratch[7].r;
381 
382  // forth result
383  scratch_out[3].r = scratch[5].r - scratch[7].i;
384  scratch_out[3].i = scratch[5].i + scratch[7].r;
385 
386  // store
387  *Fout1 = scratch_out[0];
388  Fout2 = Fout1 + mstride;
389  *Fout2 = scratch_out[1];
390  Fout2 += mstride;
391  *Fout2 = scratch_out[2];
392  Fout2 += mstride;
393  *Fout2 = scratch_out[3];
394 
395  tw1++;
396  Fin1 ++;
397  Fout1 ++;
398  } // m_count
399  } // f_count
400  tw += mstride * 3;
401  mstride <<= 2;
402  // swap
403  Ftmp = buffer;
404  buffer = Fout;
405  Fout = Ftmp;
406  fstride >>= 2;
407  } // stage_count
408 
409  // the last one
410  if (stage_count)
411  {
412  Fin1 = buffer;
413  // if stage count is even, output to the input array
414  Fout1 = Fout_ls;
415 
416  for (f_count = 0; f_count < fstride; f_count ++)
417  {
418  tw1 = tw;
419  for (m_count = mstride; m_count ; m_count --)
420  {
421  // load
422  scratch_tw[0] = *tw1;
423  tw2 = tw1 + mstride;
424  scratch_tw[1] = *tw2;
425  tw2 += mstride;
426  scratch_tw[2] = *tw2;
427  scratch_in[0] = * Fin1;
428  Fin2 = Fin1 + N;
429  scratch_in[1] = * Fin2;
430  Fin2 += N;
431  scratch_in[2] = * Fin2;
432  Fin2 += N;
433  scratch_in[3] = * Fin2;
434  if (scaled_flag == 1)
435  {
436  NE10_F2I16_FIXDIV (scratch_in[0], 4);
437  NE10_F2I16_FIXDIV (scratch_in[1], 4);
438  NE10_F2I16_FIXDIV (scratch_in[2], 4);
439  NE10_F2I16_FIXDIV (scratch_in[3], 4);
440  }
441 
442  // radix 4 butterfly with twiddles
443 
444  scratch[0] = scratch_in[0];
445  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
446  - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
447  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
448  + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
449 
450  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
451  - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
452  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
453  + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
454 
455  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
456  - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
457  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
458  + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
459 
460  // radix 2 butterfly
461  scratch[4].r = scratch[0].r + scratch[2].r;
462  scratch[4].i = scratch[0].i + scratch[2].i;
463 
464  scratch[5].r = scratch[0].r - scratch[2].r;
465  scratch[5].i = scratch[0].i - scratch[2].i;
466 
467  // radix 2 butterfly
468  scratch[6].r = scratch[1].r + scratch[3].r;
469  scratch[6].i = scratch[1].i + scratch[3].i;
470 
471  scratch[7].r = scratch[1].r - scratch[3].r;
472  scratch[7].i = scratch[1].i - scratch[3].i;
473 
474  // third result
475  scratch_out[2].r = scratch[4].r - scratch[6].r;
476  scratch_out[2].i = scratch[4].i - scratch[6].i;
477 
478  // first result
479  scratch_out[0].r = scratch[4].r + scratch[6].r;
480  scratch_out[0].i = scratch[4].i + scratch[6].i;
481 
482  // second result
483  scratch_out[1].r = scratch[5].r + scratch[7].i;
484  scratch_out[1].i = scratch[5].i - scratch[7].r;
485 
486  // forth result
487  scratch_out[3].r = scratch[5].r - scratch[7].i;
488  scratch_out[3].i = scratch[5].i + scratch[7].r;
489 
490  // store
491  *Fout1 = scratch_out[0];
492  Fout2 = Fout1 + N;
493  *Fout2 = scratch_out[1];
494  Fout2 += N;
495  *Fout2 = scratch_out[2];
496  Fout2 += N;
497  *Fout2 = scratch_out[3];
498 
499  tw1 ++;
500  Fin1 ++;
501  Fout1 ++;
502  } // m_count
503  } // f_count
504  } // last stage
505 }
506 
507 static void ne10_mixed_radix_butterfly_inverse_int16_c (ne10_fft_cpx_int16_t * Fout,
508  ne10_fft_cpx_int16_t * Fin,
509  ne10_int32_t * factors,
510  ne10_fft_cpx_int16_t * twiddles,
511  ne10_fft_cpx_int16_t * buffer,
512  ne10_int32_t scaled_flag)
513 {
514  ne10_int32_t fstride, mstride, N;
515  ne10_int32_t f_count, m_count;
516  ne10_int32_t stage_count;
517 
518  ne10_fft_cpx_int16_t scratch_in[8];
519  ne10_fft_cpx_int16_t scratch_out[8];
520  ne10_fft_cpx_int16_t scratch[16];
521  ne10_fft_cpx_int16_t scratch_tw[6];
522 
523  ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
524  ne10_fft_cpx_int16_t *Fout_ls = Fout;
525  ne10_fft_cpx_int16_t *Ftmp;
526  ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
527  const ne10_int32_t TW_81 = 23169;
528  const ne10_int32_t TW_81N = -23169;
529 
530  // init fstride, mstride, N
531  stage_count = factors[0];
532  fstride = factors[1];
533  mstride = factors[ (stage_count << 1) - 1 ];
534  N = factors[ stage_count << 1 ]; // radix
535  tw = twiddles;
536 
537  // the first stage
538  Fin1 = Fin;
539  Fout1 = Fout;
540  if (N == 8) // length of FFT is 2^n (n is odd)
541  {
542  // radix 8
543  N = fstride << 1; // 1/4 of length of FFT
544  tw = twiddles;
545 
546  Fin1 = Fin;
547  for (f_count = 0; f_count < fstride; f_count ++)
548  {
549  Fout1 = & Fout[ f_count * 8 ];
550  // load
551  if (scaled_flag == 1)
552  {
553  NE10_F2I16_FIXDIV (Fin1[0], 8);
554  NE10_F2I16_FIXDIV (Fin1[fstride * 4], 8);
555  NE10_F2I16_FIXDIV (Fin1[fstride], 8);
556  NE10_F2I16_FIXDIV (Fin1[fstride * 5], 8);
557  NE10_F2I16_FIXDIV (Fin1[fstride * 2], 8);
558  NE10_F2I16_FIXDIV (Fin1[fstride * 6], 8);
559  NE10_F2I16_FIXDIV (Fin1[fstride * 3], 8);
560  NE10_F2I16_FIXDIV (Fin1[fstride * 7], 8);
561  }
562 
563  scratch_in[0].r = Fin1[0].r + Fin1[fstride * 4].r;
564  scratch_in[0].i = Fin1[0].i + Fin1[fstride * 4].i;
565  scratch_in[1].r = Fin1[0].r - Fin1[fstride * 4].r;
566  scratch_in[1].i = Fin1[0].i - Fin1[fstride * 4].i;
567  scratch_in[2].r = Fin1[fstride].r + Fin1[fstride * 5].r;
568  scratch_in[2].i = Fin1[fstride].i + Fin1[fstride * 5].i;
569  scratch_in[3].r = Fin1[fstride].r - Fin1[fstride * 5].r;
570  scratch_in[3].i = Fin1[fstride].i - Fin1[fstride * 5].i;
571  scratch_in[4].r = Fin1[fstride * 2].r + Fin1[fstride * 6].r;
572  scratch_in[4].i = Fin1[fstride * 2].i + Fin1[fstride * 6].i;
573  scratch_in[5].r = Fin1[fstride * 2].r - Fin1[fstride * 6].r;
574  scratch_in[5].i = Fin1[fstride * 2].i - Fin1[fstride * 6].i;
575  scratch_in[6].r = Fin1[fstride * 3].r + Fin1[fstride * 7].r;
576  scratch_in[6].i = Fin1[fstride * 3].i + Fin1[fstride * 7].i;
577  scratch_in[7].r = Fin1[fstride * 3].r - Fin1[fstride * 7].r;
578  scratch_in[7].i = Fin1[fstride * 3].i - Fin1[fstride * 7].i;
579 
580  // radix 4 butterfly with twiddles
581 
582  scratch[0] = scratch_in[0];
583  scratch[1] = scratch_in[1];
584 
585  scratch[2] = scratch_in[2];
586  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r - scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
587  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i + scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
588 
589  scratch[4] = scratch_in[4];
590  scratch[5].r = -scratch_in[5].i;
591  scratch[5].i = scratch_in[5].r;
592 
593  scratch[6].r = scratch_in[6].r;
594  scratch[6].i = scratch_in[6].i;
595  scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r + scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
596  scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i - scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
597 
598  // radix 2 butterfly
599  scratch[8].r = scratch[0].r + scratch[4].r;
600  scratch[8].i = scratch[0].i + scratch[4].i;
601  scratch[9].r = scratch[1].r + scratch[5].r;
602  scratch[9].i = scratch[1].i + scratch[5].i;
603 
604  scratch[10].r = scratch[0].r - scratch[4].r;
605  scratch[10].i = scratch[0].i - scratch[4].i;
606  scratch[11].r = scratch[1].r - scratch[5].r;
607  scratch[11].i = scratch[1].i - scratch[5].i;
608 
609  // radix 2 butterfly
610  scratch[12].r = scratch[2].r + scratch[6].r;
611  scratch[12].i = scratch[2].i + scratch[6].i;
612  scratch[13].r = scratch[3].r + scratch[7].r;
613  scratch[13].i = scratch[3].i + scratch[7].i;
614 
615  scratch[14].r = scratch[2].r - scratch[6].r;
616  scratch[14].i = scratch[2].i - scratch[6].i;
617  scratch[15].r = scratch[3].r - scratch[7].r;
618  scratch[15].i = scratch[3].i - scratch[7].i;
619 
620  // third result
621  scratch_out[4].r = scratch[8].r - scratch[12].r;
622  scratch_out[4].i = scratch[8].i - scratch[12].i;
623  scratch_out[5].r = scratch[9].r - scratch[13].r;
624  scratch_out[5].i = scratch[9].i - scratch[13].i;
625 
626  // first result
627  scratch_out[0].r = scratch[8].r + scratch[12].r;
628  scratch_out[0].i = scratch[8].i + scratch[12].i;
629  scratch_out[1].r = scratch[9].r + scratch[13].r;
630  scratch_out[1].i = scratch[9].i + scratch[13].i;
631 
632  // second result
633  scratch_out[2].r = scratch[10].r - scratch[14].i;
634  scratch_out[2].i = scratch[10].i + scratch[14].r;
635  scratch_out[3].r = scratch[11].r - scratch[15].i;
636  scratch_out[3].i = scratch[11].i + scratch[15].r;
637 
638  // forth result
639  scratch_out[6].r = scratch[10].r + scratch[14].i;
640  scratch_out[6].i = scratch[10].i - scratch[14].r;
641  scratch_out[7].r = scratch[11].r + scratch[15].i;
642  scratch_out[7].i = scratch[11].i - scratch[15].r;
643 
644  // store
645  Fout1[0] = scratch_out[0];
646  Fout1[1] = scratch_out[1];
647  Fout1[2] = scratch_out[2];
648  Fout1[3] = scratch_out[3];
649  Fout1[4] = scratch_out[4];
650  Fout1[5] = scratch_out[5];
651  Fout1[6] = scratch_out[6];
652  Fout1[7] = scratch_out[7];
653 
654  Fin1 += 1;
655  } // f_count
656  fstride >>= 2;
657  stage_count--;
658 
659  // swap
660  Ftmp = buffer;
661  buffer = Fout;
662  Fout = Ftmp;
663  }
664  else if (N == 4) // length of FFT is 2^n (n is even)
665  {
666  //fstride is nfft>>2
667  for (f_count = fstride; f_count ; f_count --)
668  {
669  // load
670  scratch_in[0] = *Fin1;
671  Fin2 = Fin1 + fstride;
672  scratch_in[1] = *Fin2;
673  Fin2 = Fin2 + fstride;
674  scratch_in[2] = *Fin2;
675  Fin2 = Fin2 + fstride;
676  scratch_in[3] = *Fin2;
677 
678  // radix 4 butterfly without twiddles
679  if (scaled_flag == 1)
680  {
681  NE10_F2I16_FIXDIV (scratch_in[0], 4);
682  NE10_F2I16_FIXDIV (scratch_in[1], 4);
683  NE10_F2I16_FIXDIV (scratch_in[2], 4);
684  NE10_F2I16_FIXDIV (scratch_in[3], 4);
685  }
686 
687  // radix 2 butterfly
688  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
689  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
690 
691  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
692  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
693 
694  // radix 2 butterfly
695  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
696  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
697 
698  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
699  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
700 
701  // third result
702  scratch_out[2].r = scratch[0].r - scratch[2].r;
703  scratch_out[2].i = scratch[0].i - scratch[2].i;
704 
705  // first result
706  scratch_out[0].r = scratch[0].r + scratch[2].r;
707  scratch_out[0].i = scratch[0].i + scratch[2].i;
708 
709  // second result
710  scratch_out[1].r = scratch[1].r - scratch[3].i;
711  scratch_out[1].i = scratch[1].i + scratch[3].r;
712 
713  // forth result
714  scratch_out[3].r = scratch[1].r + scratch[3].i;
715  scratch_out[3].i = scratch[1].i - scratch[3].r;
716 
717  // store
718  * Fout1 ++ = scratch_out[0];
719  * Fout1 ++ = scratch_out[1];
720  * Fout1 ++ = scratch_out[2];
721  * Fout1 ++ = scratch_out[3];
722 
723  Fin1++;
724  } // f_count
725 
726  N = fstride; // 1/4 of length of FFT
727  // update address for other stages
728  stage_count--;
729  fstride >>= 2;
730 
731  // swap
732  Ftmp = buffer;
733  buffer = Fout;
734  Fout = Ftmp;
735 
736  // end of first stage
737  }
738  else if (N == 2) // Length of FFT is 2
739  {
740  scratch_in[0] = Fin1[0];
741  scratch_in[1] = Fin1[1];
742 
743  if (scaled_flag == 1)
744  {
745  NE10_F2I16_FIXDIV (scratch_in[0], 2);
746  NE10_F2I16_FIXDIV (scratch_in[1], 2);
747  }
748 
749  Fout1[0].r = scratch_in[0].r + scratch_in[1].r;
750  Fout1[0].i = scratch_in[0].i + scratch_in[1].i;
751  Fout1[1].r = scratch_in[0].r - scratch_in[1].r;
752  Fout1[1].i = scratch_in[0].i - scratch_in[1].i;
753  return;
754  }
755  else // Length of FFT is 1
756  {
757  Fout1[0] = Fin1[0];
758  return;
759  }
760 
761 
762  // others but the last one
763  for (; stage_count > 1 ; stage_count--)
764  {
765  Fin1 = buffer;
766  for (f_count = 0; f_count < fstride; f_count ++)
767  {
768  Fout1 = & Fout[ f_count * mstride << 2 ];
769  tw1 = tw;
770  for (m_count = mstride; m_count ; m_count --)
771  {
772  // load
773  scratch_tw[0] = *tw1;
774  tw2 = tw1 + mstride;
775  scratch_tw[1] = *tw2;
776  tw2 += mstride;
777  scratch_tw[2] = *tw2;
778  scratch_in[0] = * Fin1;
779  Fin2 = Fin1 + N;
780  scratch_in[1] = * Fin2;
781  Fin2 += N;
782  scratch_in[2] = * Fin2;
783  Fin2 += N;
784  scratch_in[3] = * Fin2;
785  if (scaled_flag == 1)
786  {
787  NE10_F2I16_FIXDIV (scratch_in[0], 4);
788  NE10_F2I16_FIXDIV (scratch_in[1], 4);
789  NE10_F2I16_FIXDIV (scratch_in[2], 4);
790  NE10_F2I16_FIXDIV (scratch_in[3], 4);
791  }
792 
793  // radix 4 butterfly with twiddles
794 
795  scratch[0] = scratch_in[0];
796  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
797  + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
798  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
799  - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
800 
801  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
802  + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
803  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
804  - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
805 
806  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
807  + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
808  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
809  - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
810 
811  // radix 2 butterfly
812  scratch[4].r = scratch[0].r + scratch[2].r;
813  scratch[4].i = scratch[0].i + scratch[2].i;
814 
815  scratch[5].r = scratch[0].r - scratch[2].r;
816  scratch[5].i = scratch[0].i - scratch[2].i;
817 
818  // radix 2 butterfly
819  scratch[6].r = scratch[1].r + scratch[3].r;
820  scratch[6].i = scratch[1].i + scratch[3].i;
821 
822  scratch[7].r = scratch[1].r - scratch[3].r;
823  scratch[7].i = scratch[1].i - scratch[3].i;
824 
825  // third result
826  scratch_out[2].r = scratch[4].r - scratch[6].r;
827  scratch_out[2].i = scratch[4].i - scratch[6].i;
828 
829  // first result
830  scratch_out[0].r = scratch[4].r + scratch[6].r;
831  scratch_out[0].i = scratch[4].i + scratch[6].i;
832 
833  // second result
834  scratch_out[1].r = scratch[5].r - scratch[7].i;
835  scratch_out[1].i = scratch[5].i + scratch[7].r;
836 
837  // forth result
838  scratch_out[3].r = scratch[5].r + scratch[7].i;
839  scratch_out[3].i = scratch[5].i - scratch[7].r;
840 
841  // store
842  *Fout1 = scratch_out[0];
843  Fout2 = Fout1 + mstride;
844  *Fout2 = scratch_out[1];
845  Fout2 += mstride;
846  *Fout2 = scratch_out[2];
847  Fout2 += mstride;
848  *Fout2 = scratch_out[3];
849 
850  tw1++;
851  Fin1 ++;
852  Fout1 ++;
853  } // m_count
854  } // f_count
855  tw += mstride * 3;
856  mstride <<= 2;
857  // swap
858  Ftmp = buffer;
859  buffer = Fout;
860  Fout = Ftmp;
861  fstride >>= 2;
862  } // stage_count
863 
864  // the last one
865  if (stage_count)
866  {
867  Fin1 = buffer;
868  // if stage count is even, output to the input array
869  Fout1 = Fout_ls;
870 
871  for (f_count = 0; f_count < fstride; f_count ++)
872  {
873  tw1 = tw;
874  for (m_count = mstride; m_count ; m_count --)
875  {
876  // load
877  scratch_tw[0] = *tw1;
878  tw2 = tw1 + mstride;
879  scratch_tw[1] = *tw2;
880  tw2 += mstride;
881  scratch_tw[2] = *tw2;
882  scratch_in[0] = * Fin1;
883  Fin2 = Fin1 + N;
884  scratch_in[1] = * Fin2;
885  Fin2 += N;
886  scratch_in[2] = * Fin2;
887  Fin2 += N;
888  scratch_in[3] = * Fin2;
889  if (scaled_flag == 1)
890  {
891  NE10_F2I16_FIXDIV (scratch_in[0], 4);
892  NE10_F2I16_FIXDIV (scratch_in[1], 4);
893  NE10_F2I16_FIXDIV (scratch_in[2], 4);
894  NE10_F2I16_FIXDIV (scratch_in[3], 4);
895  }
896 
897  // radix 4 butterfly with twiddles
898 
899  scratch[0] = scratch_in[0];
900  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
901  + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
902  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
903  - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
904 
905  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
906  + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
907  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
908  - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
909 
910  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
911  + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
912  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
913  - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
914 
915  // radix 2 butterfly
916  scratch[4].r = scratch[0].r + scratch[2].r;
917  scratch[4].i = scratch[0].i + scratch[2].i;
918 
919  scratch[5].r = scratch[0].r - scratch[2].r;
920  scratch[5].i = scratch[0].i - scratch[2].i;
921 
922  // radix 2 butterfly
923  scratch[6].r = scratch[1].r + scratch[3].r;
924  scratch[6].i = scratch[1].i + scratch[3].i;
925 
926  scratch[7].r = scratch[1].r - scratch[3].r;
927  scratch[7].i = scratch[1].i - scratch[3].i;
928 
929  // third result
930  scratch_out[2].r = scratch[4].r - scratch[6].r;
931  scratch_out[2].i = scratch[4].i - scratch[6].i;
932 
933  // first result
934  scratch_out[0].r = scratch[4].r + scratch[6].r;
935  scratch_out[0].i = scratch[4].i + scratch[6].i;
936 
937  // second result
938  scratch_out[1].r = scratch[5].r - scratch[7].i;
939  scratch_out[1].i = scratch[5].i + scratch[7].r;
940 
941  // forth result
942  scratch_out[3].r = scratch[5].r + scratch[7].i;
943  scratch_out[3].i = scratch[5].i - scratch[7].r;
944 
945  // store
946  *Fout1 = scratch_out[0];
947  Fout2 = Fout1 + N;
948  *Fout2 = scratch_out[1];
949  Fout2 += N;
950  *Fout2 = scratch_out[2];
951  Fout2 += N;
952  *Fout2 = scratch_out[3];
953 
954  tw1 ++;
955  Fin1 ++;
956  Fout1 ++;
957  } // m_count
958  } // f_count
959  } // last stage
960 }
961 
962 static void ne10_fft_split_r2c_1d_int16 (ne10_fft_cpx_int16_t *dst,
963  const ne10_fft_cpx_int16_t *src,
964  ne10_fft_cpx_int16_t *twiddles,
965  ne10_int32_t ncfft,
966  ne10_int32_t scaled_flag)
967 {
968  ne10_int32_t k;
969  ne10_fft_cpx_int16_t fpnk, fpk, f1k, f2k, tw, tdc;
970 
971  tdc.r = src[0].r;
972  tdc.i = src[0].i;
973 
974  if (scaled_flag)
975  NE10_F2I16_FIXDIV (tdc, 2);
976 
977  dst[0].r = tdc.r + tdc.i;
978  dst[ncfft].r = tdc.r - tdc.i;
979  dst[ncfft].i = dst[0].i = 0;
980 
981  for (k = 1; k <= ncfft / 2 ; ++k)
982  {
983  fpk = src[k];
984  fpnk.r = src[ncfft - k].r;
985  fpnk.i = - src[ncfft - k].i;
986  if (scaled_flag)
987  {
988  NE10_F2I16_FIXDIV (fpk, 2);
989  NE10_F2I16_FIXDIV (fpnk, 2);
990  }
991 
992  f1k.r = fpk.r + fpnk.r;
993  f1k.i = fpk.i + fpnk.i;
994 
995  f2k.r = fpk.r - fpnk.r;
996  f2k.i = fpk.i - fpnk.i;
997 
998  tw.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).r
999  - (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1000  tw.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).i
1001  + (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).r) >> NE10_F2I16_SHIFT);
1002 
1003  dst[k].r = (f1k.r + tw.r) >> 1;
1004  dst[k].i = (f1k.i + tw.i) >> 1;
1005  dst[ncfft - k].r = (f1k.r - tw.r) >> 1;
1006  dst[ncfft - k].i = (tw.i - f1k.i) >> 1;
1007  }
1008 }
1009 
1010 static void ne10_fft_split_c2r_1d_int16 (ne10_fft_cpx_int16_t *dst,
1011  const ne10_fft_cpx_int16_t *src,
1012  ne10_fft_cpx_int16_t *twiddles,
1013  ne10_int32_t ncfft,
1014  ne10_int32_t scaled_flag)
1015 {
1016 
1017  ne10_int32_t k;
1018  ne10_fft_cpx_int16_t fk, fnkc, fek, fok, tmp;
1019 
1020 
1021  dst[0].r = src[0].r + src[ncfft].r;
1022  dst[0].i = src[0].r - src[ncfft].r;
1023 
1024  if (scaled_flag)
1025  NE10_F2I16_FIXDIV (dst[0], 2);
1026 
1027  for (k = 1; k <= ncfft / 2; k++)
1028  {
1029  fk = src[k];
1030  fnkc.r = src[ncfft - k].r;
1031  fnkc.i = -src[ncfft - k].i;
1032  if (scaled_flag)
1033  {
1034  NE10_F2I16_FIXDIV (fk, 2);
1035  NE10_F2I16_FIXDIV (fnkc, 2);
1036  }
1037 
1038  fek.r = fk.r + fnkc.r;
1039  fek.i = fk.i + fnkc.i;
1040 
1041  tmp.r = fk.r - fnkc.r;
1042  tmp.i = fk.i - fnkc.i;
1043 
1044  fok.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).r
1045  + (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1046  fok.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).r
1047  - (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1048 
1049  dst[k].r = fek.r + fok.r;
1050  dst[k].i = fek.i + fok.i;
1051 
1052  dst[ncfft - k].r = fek.r - fok.r;
1053  dst[ncfft - k].i = fok.i - fek.i;
1054  }
1055 }
1056 
1072 {
1073  ne10_fft_cfg_int16_t st = NULL;
1074  ne10_uint32_t memneeded = sizeof (ne10_fft_state_int16_t)
1075  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1076  + sizeof (ne10_fft_cpx_int16_t) * (nfft) /* twiddles */
1077  + sizeof (ne10_fft_cpx_int16_t) * nfft /* buffer */
1078  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */
1079 
1080  st = (ne10_fft_cfg_int16_t) NE10_MALLOC (memneeded);
1081 
1082  if (st)
1083  {
1084  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int16_t);
1085  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1086  st->factors = (ne10_int32_t*) address;
1087  st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1088  st->buffer = st->twiddles + nfft;
1089  st->nfft = nfft;
1090 
1092  if (result == NE10_ERR)
1093  {
1094  NE10_FREE (st);
1095  return NULL;
1096  }
1097 
1098  ne10_int32_t j, k;
1099  ne10_int32_t *factors = st->factors;
1100  ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1101  ne10_int32_t stage_count = factors[0];
1102  ne10_int32_t fstride = factors[1];
1103  ne10_int32_t mstride;
1104  ne10_int32_t cur_radix;
1105  ne10_float32_t phase;
1106  const ne10_float32_t pi = NE10_PI;
1107 
1108  // Don't generate any twiddles for the first stage
1109  stage_count --;
1110 
1111  // Generate twiddles for the other stages
1112  for (; stage_count > 0; stage_count --)
1113  {
1114  cur_radix = factors[2 * stage_count];
1115  fstride /= cur_radix;
1116  mstride = factors[2 * stage_count + 1];
1117  for (j = 0; j < mstride; j++)
1118  {
1119  for (k = 1; k < cur_radix; k++) // phase = 1 when k = 0
1120  {
1121  phase = -2 * pi * fstride * k * j / nfft;
1122  twiddles[mstride * (k - 1) + j].r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase));
1123  twiddles[mstride * (k - 1) + j].i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase));
1124  }
1125  }
1126  twiddles += mstride * (cur_radix - 1);
1127  }
1128  }
1129 
1130  return st;
1131 }
1132 
1138  ne10_fft_cpx_int16_t *fin,
1140  ne10_int32_t inverse_fft,
1141  ne10_int32_t scaled_flag)
1142 {
1143  if (inverse_fft)
1144  ne10_mixed_radix_butterfly_inverse_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1145  else
1146  ne10_mixed_radix_butterfly_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1147 }
1148 
1164 {
1165  ne10_fft_r2c_cfg_int16_t st = NULL;
1166  ne10_int32_t ncfft = nfft >> 1;
1167 
1168  ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int16_t)
1169  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1170  + sizeof (ne10_fft_cpx_int16_t) * ncfft /* twiddles */
1171  + sizeof (ne10_fft_cpx_int16_t) * ncfft / 2 /* super twiddles */
1172  + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */
1173  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */
1174 
1175  st = (ne10_fft_r2c_cfg_int16_t) NE10_MALLOC (memneeded);
1176 
1177  if (st)
1178  {
1179  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_int16_t);
1180  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1181  st->factors = (ne10_int32_t*) address;
1182  st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1183  st->super_twiddles = st->twiddles + ncfft;
1184  st->buffer = st->super_twiddles + (ncfft / 2);
1185  st->ncfft = ncfft;
1186 
1188  if (result == NE10_ERR)
1189  {
1190  NE10_FREE (st);
1191  return NULL;
1192  }
1193 
1194  ne10_int32_t j, k;
1195  ne10_int32_t *factors = st->factors;
1196  ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1197  ne10_int32_t stage_count = factors[0];
1198  ne10_int32_t fstride = factors[1];
1199  ne10_int32_t mstride;
1200  ne10_int32_t cur_radix;
1201  ne10_float32_t phase;
1202  const ne10_float32_t pi = NE10_PI;
1203 
1204  // Don't generate any twiddles for the first stage
1205  stage_count --;
1206 
1207  // Generate twiddles for the other stages
1208  for (; stage_count > 0; stage_count --)
1209  {
1210  cur_radix = factors[2 * stage_count];
1211  fstride /= cur_radix;
1212  mstride = factors[2 * stage_count + 1];
1213  for (j = 0; j < mstride; j++)
1214  {
1215  for (k = 1; k < cur_radix; k++) // phase = 1 when k = 0
1216  {
1217  phase = -2 * pi * fstride * k * j / ncfft;
1218  twiddles[mstride * (k - 1) + j].r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase));
1219  twiddles[mstride * (k - 1) + j].i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase));
1220  }
1221  }
1222  twiddles += mstride * (cur_radix - 1);
1223  }
1224 
1225  twiddles = st->super_twiddles;
1226  for (j = 0; j < ncfft / 2; j++)
1227  {
1228  phase = -pi * ( (ne10_float32_t) (j + 1) / ncfft + 0.5f);
1229  twiddles->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase));
1230  twiddles->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase));
1231  twiddles++;
1232  }
1233  }
1234 
1235  return st;
1236 }
1237 
1243  ne10_int16_t *fin,
1245  ne10_int32_t scaled_flag)
1246 {
1247  ne10_fft_cpx_int16_t * tmpbuf = cfg->buffer;
1248 
1249  ne10_mixed_radix_butterfly_int16_c (tmpbuf, (ne10_fft_cpx_int16_t*) fin, cfg->factors, cfg->twiddles, fout, scaled_flag);
1250  ne10_fft_split_r2c_1d_int16 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1251 }
1252 
1258  ne10_fft_cpx_int16_t *fin,
1260  ne10_int32_t scaled_flag)
1261 {
1262  ne10_fft_cpx_int16_t * tmpbuf1 = cfg->buffer;
1263  ne10_fft_cpx_int16_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1264 
1265  ne10_fft_split_c2r_1d_int16 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1266  ne10_mixed_radix_butterfly_inverse_int16_c ( (ne10_fft_cpx_int16_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2, scaled_flag);
1267 }
Structure for the 16-bit fixed point FFT function.
Definition: NE10_types.h:294
ne10_fft_cpx_int16_t * twiddles
Definition: NE10_types.h:315
ne10_int16_t r
Definition: NE10_types.h:296
#define NE10_F2I16_MAX
Definition: NE10_macros.h:71
ne10_int32_t * factors
Definition: NE10_types.h:314
#define NE10_F2I16_SHIFT
Definition: NE10_macros.h:72
ne10_fft_cpx_int16_t * twiddles
Definition: NE10_types.h:304
#define NE10_MAXFACTORS
Structure for the floating point FFT function.
Definition: NE10_types.h:229
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using plain C.
ne10_int32_t ne10_factor(ne10_int32_t n, ne10_int32_t *facbuf, ne10_int32_t ne10_factor_flags)
Definition: NE10_fft.c:71
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using plain C.
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using plain C.
#define NE10_F2I16_SAMPPROD
Definition: NE10_macros.h:73
#define NE10_FFT_BYTE_ALIGNMENT
Definition: NE10_fft.h:45
ne10_fft_cpx_int16_t * super_twiddles
Definition: NE10_types.h:316
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define NE10_PI
NE10 defines a number of macros for use in its function signatures.
Definition: NE10_macros.h:47
ne10_int16_t i
Definition: NE10_types.h:297
#define NE10_FREE(p)
Definition: NE10_macros.h:54
ne10_int32_t * factors
Definition: NE10_types.h:303
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
#define NE10_F2I16_FIXDIV(c, div)
Definition: NE10_macros.h:77
#define NE10_FACTOR_EIGHT_FIRST_STAGE
Definition: NE10_fft.h:72
ne10_fft_cpx_int16_t * buffer
Definition: NE10_types.h:305
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_int16 and ne10_fft_c2r_1d_int16.
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define NE10_BYTE_ALIGNMENT(address, alignment)
Definition: NE10_macros.h:63
ne10_fft_r2c_state_int16_t * ne10_fft_r2c_cfg_int16_t
Definition: NE10_types.h:320
#define NE10_ERR
Definition: NE10_types.h:66
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_c2c_1d_int16.
int16_t ne10_int16_t
Definition: NE10_types.h:74
ne10_fft_state_int16_t * ne10_fft_cfg_int16_t
Definition: NE10_types.h:308
ne10_fft_cpx_int16_t * buffer
Definition: NE10_types.h:317