Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft.h
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : dsp/NE10_fft.h
30  */
31 
32 #include "NE10_dsp.h"
33 #include "NE10_types.h"
35 #include "NE10_fft_cplx_ops.h"
36 #include "NE10_fft_bfly.h"
37 #include "NE10_fft_debug_macro.h"
38 
39 #ifndef NE10_FFT_H
40 #define NE10_FFT_H
41 
43 // Internal macro define
45 #define NE10_FFT_BYTE_ALIGNMENT 8
46 #define NE10_INLINE inline static
47 
48 /*
49  * FFT Algorithm Flags
50  *
51  * These are used within Ne10 to decide, after factoring an FFT into stages, what
52  * FFT algorithm should be used.
53  *
54  * - NE10_FFT_ALG_DEFAULT is a mixed radix 2/4 algorithm.
55  * - NE10_FFT_ALG_ANY is designated specifically for non-power-of-two input sizes.
56  */
57 #define NE10_FFT_ALG_DEFAULT 0
58 #define NE10_FFT_ALG_ANY 1
59 
60 /*
61  * FFT Factor Flags
62  *
63  * These are used within Ne10 to decide how an input FFT size should be factored into
64  * stages (i.e. what radices should be used).
65  *
66  * - NE10_FACTOR_DEFAULT factors into 2, 3, 4, 5.
67  * - NE10_FACTOR_EIGHT_FIRST_STAGE is NE10_FACTOR_DEFAULT with the extended ability to
68  * have a radix-8 initial stage.
69  * - NE10_FACTOR_EIGHT factors into 2, 3, 4, 5, 8.
70  */
71 #define NE10_FACTOR_DEFAULT 0
72 #define NE10_FACTOR_EIGHT_FIRST_STAGE 1
73 #define NE10_FACTOR_EIGHT 2
74 
75 // Comment when do not want to scale output result
76 #define NE10_DSP_RFFT_SCALING
77 #define NE10_DSP_CFFT_SCALING
78 
79 #define NE10_FFT_PARA_LEVEL 4
80 
81 #ifdef __cplusplus
82 extern "C" {
83 #endif
84 
86 // function prototypes:
88 
89  /*common fft functions */
90  extern ne10_int32_t ne10_factor (ne10_int32_t n, ne10_int32_t * facbuf, ne10_int32_t ne10_factor_flags);
91 
93  const ne10_int32_t * factors,
94  const ne10_int32_t nfft );
95 
97  ne10_fft_cpx_float32_t * twiddles,
98  const ne10_int32_t * factors,
99  const ne10_int32_t nfft );
100 
102  const ne10_int32_t * factors,
103  const ne10_int32_t nfft );
104 
105  /*common functions for float fft */
107  const ne10_fft_cpx_float32_t * Fin,
108  const ne10_int32_t * factors,
109  const ne10_fft_cpx_float32_t * twiddles,
110  ne10_fft_cpx_float32_t * buffer,
111  const ne10_int32_t is_scaled);
112 
114  const ne10_fft_cpx_float32_t * Fin,
115  const ne10_int32_t * factors,
116  const ne10_fft_cpx_float32_t * twiddles,
117  ne10_fft_cpx_float32_t * buffer,
118  const ne10_int32_t is_scaled);
119 
121  const ne10_fft_cpx_float32_t * Fin,
122  const ne10_int32_t * factors,
123  const ne10_fft_cpx_float32_t * twiddles,
124  ne10_fft_cpx_float32_t * buffer,
125  const ne10_int32_t is_scaled);
126 
128  const ne10_fft_cpx_float32_t * Fin,
129  const ne10_int32_t * factors,
130  const ne10_fft_cpx_float32_t * twiddles,
131  ne10_fft_cpx_float32_t * buffer,
132  const ne10_int32_t is_scaled);
133 
136  ne10_int32_t * factors,
137  ne10_fft_cpx_float32_t * twiddles,
138  ne10_fft_cpx_float32_t * buffer)
139  asm ("ne10_mixed_radix_fft_forward_float32_neon");
140 
143  ne10_int32_t * factors,
144  ne10_fft_cpx_float32_t * twiddles,
145  ne10_fft_cpx_float32_t * buffer)
146  asm ("ne10_mixed_radix_fft_backward_float32_neon");
147 
148  /* common functions for fixed point fft */
149  /* butterfly for int 32 */
151  const ne10_fft_cpx_int32_t * Fin,
152  const ne10_int32_t * factors,
153  const ne10_fft_cpx_int32_t * twiddles,
154  ne10_fft_cpx_int32_t * buffer,
155  const ne10_int32_t scaled_flag);
156 
158  const ne10_fft_cpx_int32_t * Fin,
159  const ne10_int32_t * factors,
160  const ne10_fft_cpx_int32_t * twiddles,
161  ne10_fft_cpx_int32_t * buffer,
162  const ne10_int32_t scaled_flag);
163 
165  const ne10_fft_cpx_int32_t * Fin,
166  const ne10_int32_t * factors,
167  const ne10_fft_cpx_int32_t * twiddles,
168  ne10_fft_cpx_int32_t * buffer,
169  const ne10_int32_t scaled_flag);
170 
172  const ne10_fft_cpx_int32_t * Fin,
173  const ne10_int32_t * factors,
174  const ne10_fft_cpx_int32_t * twiddles,
175  ne10_fft_cpx_int32_t * buffer,
176  const ne10_int32_t scaled_flag);
177 
180  ne10_int32_t * factors,
181  ne10_fft_cpx_int32_t * twiddles,
182  ne10_fft_cpx_int32_t * buffer)
183  asm ("ne10_mixed_radix_fft_forward_int32_unscaled_neon");
186  ne10_int32_t * factors,
187  ne10_fft_cpx_int32_t * twiddles,
188  ne10_fft_cpx_int32_t * buffer)
189  asm ("ne10_mixed_radix_fft_backward_int32_unscaled_neon");
192  ne10_int32_t * factors,
193  ne10_fft_cpx_int32_t * twiddles,
194  ne10_fft_cpx_int32_t * buffer)
195  asm ("ne10_mixed_radix_fft_forward_int32_scaled_neon");
198  ne10_int32_t * factors,
199  ne10_fft_cpx_int32_t * twiddles,
200  ne10_fft_cpx_int32_t * buffer)
201  asm ("ne10_mixed_radix_fft_backward_int32_scaled_neon");
202 
203  /* butterfly for int 16 */
206  ne10_int32_t * factors,
207  ne10_fft_cpx_int16_t * twiddles,
208  ne10_fft_cpx_int16_t * buffer)
209  asm ("ne10_mixed_radix_fft_forward_int16_unscaled_neon");
212  ne10_int32_t * factors,
213  ne10_fft_cpx_int16_t * twiddles,
214  ne10_fft_cpx_int16_t * buffer)
215  asm ("ne10_mixed_radix_fft_backward_int16_unscaled_neon");
218  ne10_int32_t * factors,
219  ne10_fft_cpx_int16_t * twiddles,
220  ne10_fft_cpx_int16_t * buffer)
221  asm ("ne10_mixed_radix_fft_forward_int16_scaled_neon");
224  ne10_int32_t * factors,
225  ne10_fft_cpx_int16_t * twiddles,
226  ne10_fft_cpx_int16_t * buffer)
227  asm ("ne10_mixed_radix_fft_backward_int16_scaled_neon");
228 
229 #if (NE10_UNROLL_LEVEL > 0)
230  extern void ne10_radix2_r2c_c (ne10_fft_cpx_float32_t *Fout,
231  const ne10_fft_cpx_float32_t *Fin);
232 
233  extern void ne10_radix2_c2r_c (ne10_fft_cpx_float32_t *Fout,
234  const ne10_fft_cpx_float32_t *Fin);
235 
236  extern void ne10_radix4_r2c_c (ne10_fft_cpx_float32_t *Fout,
237  const ne10_fft_cpx_float32_t *Fin,
238  const ne10_int32_t fstride,
239  const ne10_int32_t mstride,
240  const ne10_int32_t nfft);
241 
242  extern void ne10_radix4_c2r_c (ne10_fft_cpx_float32_t *Fout,
243  const ne10_fft_cpx_float32_t *Fin,
244  const ne10_int32_t fstride,
245  const ne10_int32_t mstride,
246  const ne10_int32_t nfft);
247 
248  extern void ne10_radix8_r2c_c (ne10_fft_cpx_float32_t *Fout,
249  const ne10_fft_cpx_float32_t *Fin,
250  const ne10_int32_t fstride,
251  const ne10_int32_t mstride,
252  const ne10_int32_t nfft);
253 
254  extern void ne10_radix8_c2r_c (ne10_fft_cpx_float32_t *Fout,
255  const ne10_fft_cpx_float32_t *Fin,
256  const ne10_int32_t fstride,
257  const ne10_int32_t mstride,
258  const ne10_int32_t nfft);
259 #endif // NE10_UNROLL_LEVEL
260 
261 #ifdef __cplusplus
262 }
263 #endif
264 
265 #endif
void ne10_mixed_radix_fft_backward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_scaled_neon")
Structure for the 16-bit fixed point FFT function.
Definition: NE10_types.h:294
void ne10_mixed_radix_generic_butterfly_float32_neon(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
void ne10_mixed_radix_generic_butterfly_inverse_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
void ne10_mixed_radix_fft_backward_int32_scaled_neon(ne10_fft_cpx_int32_t *Fout, ne10_fft_cpx_int32_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer) asm("ne10_mixed_radix_fft_backward_int32_scaled_neon")
void ne10_mixed_radix_fft_forward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_scaled_neon")
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_mixed_radix_fft_forward_int32_unscaled_neon(ne10_fft_cpx_int32_t *Fout, ne10_fft_cpx_int32_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer) asm("ne10_mixed_radix_fft_forward_int32_unscaled_neon")
ne10_int32_t ne10_factor(ne10_int32_t n, ne10_int32_t *facbuf, ne10_int32_t ne10_factor_flags)
Definition: NE10_fft.c:71
void ne10_mixed_radix_generic_butterfly_int32_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t scaled_flag)
Generic (forward) FFT function for 32-bit fixed point.
void ne10_mixed_radix_fft_backward_float32_neon(ne10_fft_cpx_float32_t *Fout, ne10_fft_cpx_float32_t *Fin, ne10_int32_t *factors, ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer) asm("ne10_mixed_radix_fft_backward_float32_neon")
void ne10_mixed_radix_fft_forward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_unscaled_neon")
ne10_fft_cpx_int32_t * ne10_fft_generate_twiddles_int32(ne10_fft_cpx_int32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
Definition: NE10_fft.c:246
void ne10_mixed_radix_fft_backward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_unscaled_neon")
void ne10_mixed_radix_generic_butterfly_inverse_float32_neon(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
void ne10_mixed_radix_fft_backward_int32_unscaled_neon(ne10_fft_cpx_int32_t *Fout, ne10_fft_cpx_int32_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer) asm("ne10_mixed_radix_fft_backward_int32_unscaled_neon")
ne10_fft_cpx_float32_t * ne10_fft_generate_twiddles_transposed_float32(ne10_fft_cpx_float32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
Definition: NE10_fft.c:330
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
void ne10_mixed_radix_generic_butterfly_inverse_int32_neon(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t scaled_flag)
ne10_fft_cpx_float32_t * ne10_fft_generate_twiddles_float32(ne10_fft_cpx_float32_t *twiddles, const ne10_int32_t *factors, const ne10_int32_t nfft)
Definition: NE10_fft.c:320
void ne10_mixed_radix_generic_butterfly_int32_neon(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t scaled_flag)
void ne10_mixed_radix_generic_butterfly_float32_c(ne10_fft_cpx_float32_t *Fout, const ne10_fft_cpx_float32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer, const ne10_int32_t is_scaled)
void ne10_mixed_radix_fft_forward_int32_scaled_neon(ne10_fft_cpx_int32_t *Fout, ne10_fft_cpx_int32_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer) asm("ne10_mixed_radix_fft_forward_int32_scaled_neon")
void ne10_mixed_radix_fft_forward_float32_neon(ne10_fft_cpx_float32_t *Fout, ne10_fft_cpx_float32_t *Fin, ne10_int32_t *factors, ne10_fft_cpx_float32_t *twiddles, ne10_fft_cpx_float32_t *buffer) asm("ne10_mixed_radix_fft_forward_float32_neon")
void ne10_mixed_radix_generic_butterfly_inverse_int32_c(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, const ne10_int32_t scaled_flag)
Generic IFFT function for 32-bit fixed point.