Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_generic_int32.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2015-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_generic_int32.cpp
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
50 #include "NE10_fft_generic_int32.h"
51 
68 template<int RADIX, bool is_first_stage, bool is_inverse, bool is_scaled>
70  const ne10_fft_cpx_int32_t *Fin,
71  const ne10_fft_cpx_int32_t *twiddles,
72  const ne10_int32_t fstride,
73  const ne10_int32_t out_step,
74  const ne10_int32_t nfft)
75 {
76  const ne10_int32_t in_step = nfft / RADIX;
77  ne10_int32_t f_count;
78  ne10_int32_t m_count;
79 
80  for (f_count = fstride; f_count > 0; f_count--)
81  {
82  for (m_count = out_step; m_count > 0; m_count--)
83  {
84  ne10_fft_cpx_int32_t scratch_in[RADIX];
85  ne10_fft_cpx_int32_t scratch_out[RADIX];
86 
87  // Load from input buffer.
88  NE10_LOAD_BY_STEP <RADIX> (scratch_in, Fin, in_step);
89 
90  if (is_inverse)
91  {
92  // Conjugate all elements in scratch_in.
93  NE10_CONJ<RADIX> (scratch_in);
94  }
95 
96  if (is_scaled)
97  {
98  // All elements in scratch_in are divided by radix of this stage.
99  NE10_SCALED<RADIX> (scratch_in, RADIX);
100  }
101 
102  if (!is_first_stage)
103  {
104  // Multiply twiddles for all stages but the first one.
105  ne10_fft_cpx_int32_t scratch_tw[RADIX - 1];
106  ne10_fft_cpx_int32_t scratch[RADIX];
107 
108  // Load twiddles from twiddles buffer.
109  NE10_LOAD_BY_STEP<RADIX - 1> (scratch_tw, twiddles, out_step);
110 
111  FFT_MUL_TW<RADIX> (scratch, scratch_in, scratch_tw);
112 
113  // Copy from temp buff scratch to scratch_in.
114  NE10_LOAD_BY_STEP<RADIX> (scratch_in, scratch, 1);
115  }
116 
117  // Radix -2, -3, -4 or -5 butterfly
118  // From scratch_in to scratch_out.
119  FFT_FCU<RADIX> (scratch_out, scratch_in);
120 
121  if (is_inverse)
122  {
123  // Conjugate all elements in scratch_out.
124  NE10_CONJ<RADIX> (scratch_out);
125  }
126 
127  // Store to output buffer.
128  NE10_STORE_BY_STEP<RADIX> (Fout, scratch_out, out_step);
129 
130  // Update input, output and twiddles pointers.
131  Fin++;
132  if (!is_first_stage)
133  {
134  Fout++;
135  twiddles++;
136  }
137  else
138  {
139  Fout += RADIX;
140  }
141  }
142  if (!is_first_stage)
143  {
144  // Roll back twiddles.
145  twiddles -= out_step;
146  // Next output groups.
147  Fout += (RADIX - 1) * out_step;
148  }
149  }
150 }
151 
164 template<bool is_inverse, bool is_scaled>
165 static inline void ne10_radix_generic_butterfly_int32_c (ne10_fft_cpx_int32_t *Fout,
166  const ne10_fft_cpx_int32_t *Fin,
167  const ne10_fft_cpx_int32_t *twiddles,
168  const ne10_int32_t radix,
169  const ne10_int32_t in_step,
170  const ne10_int32_t out_step)
171 {
172  ne10_int32_t q, q1;
173  ne10_int32_t f_count = in_step;
174 
176  ne10_fft_cpx_int32_t *scratch;
177  scratch = (ne10_fft_cpx_int32_t *) NE10_MALLOC (radix *
178  sizeof (ne10_fft_cpx_int32_t));
179 
180  for (; f_count > 0; f_count--)
181  {
182  // load
183  for (q1 = 0; q1 < radix; q1++)
184  {
185  scratch[q1] = Fin[in_step * q1];
186  if (is_inverse)
187  {
188  scratch[q1].i = -scratch[q1].i;
189  }
190  if (is_scaled)
191  {
192  NE10_F2I32_FIXDIV (scratch[q1], radix);
193  }
194  } // q1
195 
196  // compute Fout[q1 * out_step] from definition
197  for (q1 = 0; q1 < radix; q1++)
198  {
199  ne10_int32_t twidx = 0;
200  Fout[q1 * out_step] = scratch[0];
201  for (q = 1; q < radix; q++)
202  {
203  twidx += 1 * q1;
204  if (twidx >= radix)
205  {
206  twidx -= radix;
207  }
208  NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
209  NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
210  } // q
211  if (is_inverse)
212  {
213  Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
214  }
215  } // q1
216 
217  Fout += radix;
218  Fin++;
219  }
220 
221  NE10_FREE (scratch);
222 }
223 
234 template<bool is_inverse, bool is_scaled>
236  const ne10_fft_cpx_int32_t *Fin,
237  const ne10_int32_t *factors,
238  const ne10_fft_cpx_int32_t *twiddles,
239  ne10_fft_cpx_int32_t *buffer)
240 {
241  ne10_int32_t fstride, mstride, radix;
242  ne10_int32_t stage_count;
243  ne10_int32_t nfft;
244 
245  // init fstride, mstride, radix, nfft
246  stage_count = factors[0];
247  fstride = factors[1];
248  mstride = 1;
249  radix = factors[stage_count << 1]; // radix of first stage
250  nfft = fstride * radix;
251 
252  if (stage_count % 2 == 0)
253  {
254  ne10_swap_ptr (buffer, Fout);
255  }
256 
257  // first stage
258  switch (radix)
259  {
260  case 2:
261  ne10_radix_butterfly_int32_c<2, true, is_inverse, is_scaled> (Fout, Fin,
262  NULL, // Twiddles are not used for first stage.
263  fstride, 1, nfft);
264  break;
265  case 4:
266  ne10_radix_butterfly_int32_c<4, true, is_inverse, is_scaled> (Fout, Fin,
267  NULL, // Same as above.
268  fstride, 1, nfft);
269  break;
270  case 3:
271  ne10_radix_butterfly_int32_c<3, true, is_inverse, is_scaled> (Fout, Fin,
272  NULL, // Same as above.
273  fstride, 1, nfft);
274  break;
275  case 5:
276  ne10_radix_butterfly_int32_c<5, true, is_inverse, is_scaled> (Fout, Fin,
277  NULL, // Same as above.
278  fstride, 1, nfft);
279  break;
280  default:
281  ne10_radix_generic_butterfly_int32_c<is_inverse, is_scaled> (Fout, Fin,
282  twiddles, // Twiddles for butterfly.
283  radix, fstride, 1);
284  break;
285  }
286 
287  stage_count--;
288  if (!stage_count) // finish
289  {
290  return;
291  }
292 
293  if (radix % 2)
294  {
295  twiddles += radix;
296  }
297 
298  // other stages
299  while (stage_count > 0)
300  {
301  ne10_swap_ptr (buffer, Fout);
302  mstride *= radix;
303 
304  // update radix
305  radix = factors[stage_count << 1];
306  assert ((radix > 1) && (radix < 6));
307 
308  fstride /= radix;
309  switch (radix)
310  {
311  case 2:
312  ne10_radix_butterfly_int32_c<2, false, is_inverse, is_scaled> (Fout,
313  buffer, twiddles, fstride, mstride, nfft);
314  break;
315  case 3:
316  ne10_radix_butterfly_int32_c<3, false, is_inverse, is_scaled> (Fout,
317  buffer, twiddles, fstride, mstride, nfft);
318  break;
319  case 4:
320  ne10_radix_butterfly_int32_c<4, false, is_inverse, is_scaled> (Fout,
321  buffer, twiddles, fstride, mstride, nfft);
322  break;
323  case 5:
324  ne10_radix_butterfly_int32_c<5, false, is_inverse, is_scaled> (Fout,
325  buffer, twiddles, fstride, mstride, nfft);
326  break;
327  } // switch (radix)
328 
329  twiddles += mstride * (radix - 1);
330  stage_count--;
331  } // while (stage_count)
332 }
333 
344  const ne10_fft_cpx_int32_t *Fin,
345  const ne10_int32_t *factors,
346  const ne10_fft_cpx_int32_t *twiddles,
347  ne10_fft_cpx_int32_t *buffer,
348  const ne10_int32_t is_scaled)
349 {
350  const bool is_inverse = false;
351  if (is_scaled)
352  {
353  const bool is_scaled_flag = true;
355  is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
356  }
357  else
358  {
359  const bool is_scaled_flag = false;
361  is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
362  }
363 }
364 
375  ne10_fft_cpx_int32_t *Fout,
376  const ne10_fft_cpx_int32_t *Fin,
377  const ne10_int32_t *factors,
378  const ne10_fft_cpx_int32_t *twiddles,
379  ne10_fft_cpx_int32_t *buffer,
380  const ne10_int32_t is_scaled)
381 {
382  const bool is_inverse = true;
383  if (is_scaled)
384  {
385  const bool is_scaled_flag = true;
387  is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
388  }
389  else
390  {
391  const bool is_scaled_flag = false;
393  is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
394  }
395 }
void ne10_mixed_radix_generic_butterfly_int32_impl_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer)
Generic FFT function for 32-bit fixed point.
int32_t ne10_int32_t
Definition: NE10_types.h:76
#define ne10_swap_ptr(X, Y)
void ne10_mixed_radix_generic_butterfly_int32_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t is_scaled)
Generic (forward) FFT function for 32-bit fixed point.
void ne10_radix_butterfly_int32_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_fft_cpx_int32_t *twiddles, const ne10_int32_t fstride, const ne10_int32_t out_step, const ne10_int32_t nfft)
Generic butterfly function for 32-bit fixed point.
#define NE10_CPX_ADDTO(Z, X)
#define NE10_F2I32_FIXDIV(c, div)
Definition: NE10_macros.h:87
void ne10_mixed_radix_generic_butterfly_inverse_int32_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t is_scaled)
Generic IFFT function for 32-bit fixed point.
#define NE10_FREE(p)
Definition: NE10_macros.h:54
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
ne10_int32_t i
Definition: NE10_types.h:328
void NE10_LOAD_BY_STEP(T out[RADIX], const T *Fin, const ne10_int32_t in_step)
Load a fixed-size array from given buffer, by given step.
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define NE10_CPX_MUL_F32(Z, A, B)