Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_int16.neon.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : dsp/NE10_fft_int16.neon.c
30  */
31 
32 #include <arm_neon.h>
33 
34 #include "NE10_types.h"
35 #include "NE10_macros.h"
36 #include "NE10_fft.h"
37 
38 static inline void ne10_fft2_forward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
40 
41 {
42  Fout[0].r = Fin[0].r + Fin[1].r;
43  Fout[0].i = Fin[0].i + Fin[1].i;
44  Fout[1].r = Fin[0].r - Fin[1].r;
45  Fout[1].i = Fin[0].i - Fin[1].i;
46 }
47 
48 static inline void ne10_fft2_backward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
50 
51 {
52  Fout[0].r = Fin[0].r + Fin[1].r;
53  Fout[0].i = Fin[0].i + Fin[1].i;
54  Fout[1].r = Fin[0].r - Fin[1].r;
55  Fout[1].i = Fin[0].i - Fin[1].i;
56 }
57 
58 static inline void ne10_fft2_forward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
60 
61 {
62  Fout[0].r = (Fin[0].r + Fin[1].r) >> 1;
63  Fout[0].i = (Fin[0].i + Fin[1].i) >> 1;
64  Fout[1].r = (Fin[0].r - Fin[1].r) >> 1;
65  Fout[1].i = (Fin[0].i - Fin[1].i) >> 1;
66 }
67 
68 static inline void ne10_fft2_backward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
70 
71 {
72  Fout[0].r = (Fin[0].r + Fin[1].r) >> 1;
73  Fout[0].i = (Fin[0].i + Fin[1].i) >> 1;
74  Fout[1].r = (Fin[0].r - Fin[1].r) >> 1;
75  Fout[1].i = (Fin[0].i - Fin[1].i) >> 1;
76 }
77 
78 static inline void ne10_fft4_forward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
80 
81 {
82  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i;
83  ne10_int16_t tmp_r, tmp_i;
84 
85  s2_r = Fin[0].r - Fin[2].r;
86  s2_i = Fin[0].i - Fin[2].i;
87 
88  tmp_r = Fin[0].r + Fin[2].r;
89  tmp_i = Fin[0].i + Fin[2].i;
90 
91  s0_r = Fin[1].r + Fin[3].r;
92  s0_i = Fin[1].i + Fin[3].i;
93 
94  s1_r = Fin[1].r - Fin[3].r;
95  s1_i = Fin[1].i - Fin[3].i;
96  Fout[2].r = tmp_r - s0_r;
97  Fout[2].i = tmp_i - s0_i;
98  Fout[0].r = tmp_r + s0_r;
99  Fout[0].i = tmp_i + s0_i;
100 
101  Fout[1].r = s2_r + s1_i;
102  Fout[1].i = s2_i - s1_r;
103  Fout[3].r = s2_r - s1_i;
104  Fout[3].i = s2_i + s1_r;
105 }
106 
107 static inline void ne10_fft4_backward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
108  ne10_fft_cpx_int16_t * Fin)
109 
110 {
111  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i;
112  ne10_int16_t tmp_r, tmp_i;
113 
114  s2_r = Fin[0].r - Fin[2].r;
115  s2_i = Fin[0].i - Fin[2].i;
116 
117  tmp_r = Fin[0].r + Fin[2].r;
118  tmp_i = Fin[0].i + Fin[2].i;
119 
120  s0_r = Fin[1].r + Fin[3].r;
121  s0_i = Fin[1].i + Fin[3].i;
122 
123  s1_r = Fin[1].r - Fin[3].r;
124  s1_i = Fin[1].i - Fin[3].i;
125 
126  Fout[2].r = tmp_r - s0_r;
127  Fout[2].i = tmp_i - s0_i;
128  Fout[0].r = tmp_r + s0_r;
129  Fout[0].i = tmp_i + s0_i;
130 
131  Fout[1].r = s2_r - s1_i;
132  Fout[1].i = s2_i + s1_r;
133  Fout[3].r = s2_r + s1_i;
134  Fout[3].i = s2_i - s1_r;
135 }
136 
137 static inline void ne10_fft4_forward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
138  ne10_fft_cpx_int16_t * Fin)
139 
140 {
141  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i;
142  ne10_int16_t tmp_r, tmp_i;
143 
144  s2_r = (Fin[0].r - Fin[2].r) >> 2;
145  s2_i = (Fin[0].i - Fin[2].i) >> 2;
146  tmp_r = (Fin[0].r + Fin[2].r) >> 2;
147  tmp_i = (Fin[0].i + Fin[2].i) >> 2;
148 
149  s0_r = (Fin[1].r + Fin[3].r) >> 2;
150  s0_i = (Fin[1].i + Fin[3].i) >> 2;
151  s1_r = (Fin[1].r - Fin[3].r) >> 2;
152  s1_i = (Fin[1].i - Fin[3].i) >> 2;
153 
154  Fout[2].r = tmp_r - s0_r;
155  Fout[2].i = tmp_i - s0_i;
156  Fout[0].r = tmp_r + s0_r;
157  Fout[0].i = tmp_i + s0_i;
158 
159  Fout[1].r = s2_r + s1_i;
160  Fout[1].i = s2_i - s1_r;
161  Fout[3].r = s2_r - s1_i;
162  Fout[3].i = s2_i + s1_r;
163 }
164 
165 static inline void ne10_fft4_backward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
166  ne10_fft_cpx_int16_t * Fin)
167 
168 {
169  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i;
170  ne10_int16_t tmp_r, tmp_i;
171 
172  s2_r = (Fin[0].r - Fin[2].r) >> 2;
173  s2_i = (Fin[0].i - Fin[2].i) >> 2;
174  tmp_r = (Fin[0].r + Fin[2].r) >> 2;
175  tmp_i = (Fin[0].i + Fin[2].i) >> 2;
176 
177  s0_r = (Fin[1].r + Fin[3].r) >> 2;
178  s0_i = (Fin[1].i + Fin[3].i) >> 2;
179  s1_r = (Fin[1].r - Fin[3].r) >> 2;
180  s1_i = (Fin[1].i - Fin[3].i) >> 2;
181 
182  Fout[2].r = tmp_r - s0_r;
183  Fout[2].i = tmp_i - s0_i;
184  Fout[0].r = tmp_r + s0_r;
185  Fout[0].i = tmp_i + s0_i;
186 
187  Fout[1].r = s2_r - s1_i;
188  Fout[1].i = s2_i + s1_r;
189  Fout[3].r = s2_r + s1_i;
190  Fout[3].i = s2_i - s1_r;
191 }
192 
193 static inline void ne10_fft8_forward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
194  ne10_fft_cpx_int16_t * Fin)
195 
196 {
197  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
198  ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
199  const ne10_int16_t TW_81 = 23169;
200 
201  s0_r = Fin[0].r + Fin[4].r;
202  s0_i = Fin[0].i + Fin[4].i;
203  s1_r = Fin[0].r - Fin[4].r;
204  s1_i = Fin[0].i - Fin[4].i;
205  s2_r = Fin[1].r + Fin[5].r;
206  s2_i = Fin[1].i + Fin[5].i;
207  s3_r = Fin[1].r - Fin[5].r;
208  s3_i = Fin[1].i - Fin[5].i;
209  s4_r = Fin[2].r + Fin[6].r;
210  s4_i = Fin[2].i + Fin[6].i;
211  s5_r = Fin[2].r - Fin[6].r;
212  s5_i = Fin[2].i - Fin[6].i;
213  s6_r = Fin[3].r + Fin[7].r;
214  s6_i = Fin[3].i + Fin[7].i;
215  s7_r = Fin[3].r - Fin[7].r;
216  s7_i = Fin[3].i - Fin[7].i;
217 
218  t0_r = s0_r - s4_r;
219  t0_i = s0_i - s4_i;
220  t1_r = s0_r + s4_r;
221  t1_i = s0_i + s4_i;
222  t2_r = s2_r + s6_r;
223  t2_i = s2_i + s6_i;
224  t3_r = s2_r - s6_r;
225  t3_i = s2_i - s6_i;
226  Fout[0].r = t1_r + t2_r;
227  Fout[0].i = t1_i + t2_i;
228  Fout[4].r = t1_r - t2_r;
229  Fout[4].i = t1_i - t2_i;
230  Fout[2].r = t0_r + t3_i;
231  Fout[2].i = t0_i - t3_r;
232  Fout[6].r = t0_r - t3_i;
233  Fout[6].i = t0_i + t3_r;
234 
235  t4_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r + s3_i) * TW_81) >> NE10_F2I16_SHIFT);
236  t4_i = - (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r - s3_i) * TW_81) >> NE10_F2I16_SHIFT);
237  t5_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r - s7_i) * TW_81) >> NE10_F2I16_SHIFT);
238  t5_i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r + s7_i) * TW_81) >> NE10_F2I16_SHIFT);
239 
240  t0_r = s1_r - s5_i;
241  t0_i = s1_i + s5_r;
242  t1_r = s1_r + s5_i;
243  t1_i = s1_i - s5_r;
244  t2_r = t4_r - t5_r;
245  t2_i = t4_i - t5_i;
246  t3_r = t4_r + t5_r;
247  t3_i = t4_i + t5_i;
248  Fout[1].r = t1_r + t2_r;
249  Fout[1].i = t1_i + t2_i;
250  Fout[5].r = t1_r - t2_r;
251  Fout[5].i = t1_i - t2_i;
252  Fout[3].r = t0_r + t3_i;
253  Fout[3].i = t0_i - t3_r;
254  Fout[7].r = t0_r - t3_i;
255  Fout[7].i = t0_i + t3_r;
256 }
257 
258 static inline void ne10_fft8_backward_int16_unscaled (ne10_fft_cpx_int16_t * Fout,
259  ne10_fft_cpx_int16_t * Fin)
260 
261 {
262  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
263  ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
264  const ne10_int16_t TW_81 = 23169;
265 
266  s0_r = Fin[0].r + Fin[4].r;
267  s0_i = Fin[0].i + Fin[4].i;
268  s1_r = Fin[0].r - Fin[4].r;
269  s1_i = Fin[0].i - Fin[4].i;
270  s2_r = Fin[1].r + Fin[5].r;
271  s2_i = Fin[1].i + Fin[5].i;
272  s3_r = Fin[1].r - Fin[5].r;
273  s3_i = Fin[1].i - Fin[5].i;
274  s4_r = Fin[2].r + Fin[6].r;
275  s4_i = Fin[2].i + Fin[6].i;
276  s5_r = Fin[2].r - Fin[6].r;
277  s5_i = Fin[2].i - Fin[6].i;
278  s6_r = Fin[3].r + Fin[7].r;
279  s6_i = Fin[3].i + Fin[7].i;
280  s7_r = Fin[3].r - Fin[7].r;
281  s7_i = Fin[3].i - Fin[7].i;
282 
283  t0_r = s0_r - s4_r;
284  t0_i = s0_i - s4_i;
285  t1_r = s0_r + s4_r;
286  t1_i = s0_i + s4_i;
287  t2_r = s2_r + s6_r;
288  t2_i = s2_i + s6_i;
289  t3_r = s2_r - s6_r;
290  t3_i = s2_i - s6_i;
291  Fout[0].r = t1_r + t2_r;
292  Fout[0].i = t1_i + t2_i;
293  Fout[4].r = t1_r - t2_r;
294  Fout[4].i = t1_i - t2_i;
295  Fout[2].r = t0_r - t3_i;
296  Fout[2].i = t0_i + t3_r;
297  Fout[6].r = t0_r + t3_i;
298  Fout[6].i = t0_i - t3_r;
299 
300  t4_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r - s3_i) * TW_81) >> NE10_F2I16_SHIFT);
301  t4_i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r + s3_i) * TW_81) >> NE10_F2I16_SHIFT);
302  t5_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r + s7_i) * TW_81) >> NE10_F2I16_SHIFT);
303  t5_i = - (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r - s7_i) * TW_81) >> NE10_F2I16_SHIFT);
304 
305  t0_r = s1_r + s5_i;
306  t0_i = s1_i - s5_r;
307  t1_r = s1_r - s5_i;
308  t1_i = s1_i + s5_r;
309  t2_r = t4_r - t5_r;
310  t2_i = t4_i - t5_i;
311  t3_r = t4_r + t5_r;
312  t3_i = t4_i + t5_i;
313  Fout[1].r = t1_r + t2_r;
314  Fout[1].i = t1_i + t2_i;
315  Fout[5].r = t1_r - t2_r;
316  Fout[5].i = t1_i - t2_i;
317  Fout[3].r = t0_r - t3_i;
318  Fout[3].i = t0_i + t3_r;
319  Fout[7].r = t0_r + t3_i;
320  Fout[7].i = t0_i - t3_r;
321 }
322 static inline void ne10_fft8_forward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
323  ne10_fft_cpx_int16_t * Fin)
324 
325 {
326  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
327  ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
328  const ne10_int16_t TW_81 = 23169;
329 
330  s0_r = (Fin[0].r + Fin[4].r) >> 3;
331  s0_i = (Fin[0].i + Fin[4].i) >> 3;
332  s1_r = (Fin[0].r - Fin[4].r) >> 3;
333  s1_i = (Fin[0].i - Fin[4].i) >> 3;
334  s2_r = (Fin[1].r + Fin[5].r) >> 3;
335  s2_i = (Fin[1].i + Fin[5].i) >> 3;
336  s3_r = (Fin[1].r - Fin[5].r) >> 3;
337  s3_i = (Fin[1].i - Fin[5].i) >> 3;
338  s4_r = (Fin[2].r + Fin[6].r) >> 3;
339  s4_i = (Fin[2].i + Fin[6].i) >> 3;
340  s5_r = (Fin[2].r - Fin[6].r) >> 3;
341  s5_i = (Fin[2].i - Fin[6].i) >> 3;
342  s6_r = (Fin[3].r + Fin[7].r) >> 3;
343  s6_i = (Fin[3].i + Fin[7].i) >> 3;
344  s7_r = (Fin[3].r - Fin[7].r) >> 3;
345  s7_i = (Fin[3].i - Fin[7].i) >> 3;
346 
347  t0_r = s0_r - s4_r;
348  t0_i = s0_i - s4_i;
349  t1_r = s0_r + s4_r;
350  t1_i = s0_i + s4_i;
351  t2_r = s2_r + s6_r;
352  t2_i = s2_i + s6_i;
353  t3_r = s2_r - s6_r;
354  t3_i = s2_i - s6_i;
355  Fout[0].r = t1_r + t2_r;
356  Fout[0].i = t1_i + t2_i;
357  Fout[4].r = t1_r - t2_r;
358  Fout[4].i = t1_i - t2_i;
359  Fout[2].r = t0_r + t3_i;
360  Fout[2].i = t0_i - t3_r;
361  Fout[6].r = t0_r - t3_i;
362  Fout[6].i = t0_i + t3_r;
363 
364  t4_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r + s3_i) * TW_81) >> NE10_F2I16_SHIFT);
365  t4_i = - (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r - s3_i) * TW_81) >> NE10_F2I16_SHIFT);
366  t5_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r - s7_i) * TW_81) >> NE10_F2I16_SHIFT);
367  t5_i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r + s7_i) * TW_81) >> NE10_F2I16_SHIFT);
368 
369  t0_r = s1_r - s5_i;
370  t0_i = s1_i + s5_r;
371  t1_r = s1_r + s5_i;
372  t1_i = s1_i - s5_r;
373  t2_r = t4_r - t5_r;
374  t2_i = t4_i - t5_i;
375  t3_r = t4_r + t5_r;
376  t3_i = t4_i + t5_i;
377  Fout[1].r = t1_r + t2_r;
378  Fout[1].i = t1_i + t2_i;
379  Fout[5].r = t1_r - t2_r;
380  Fout[5].i = t1_i - t2_i;
381  Fout[3].r = t0_r + t3_i;
382  Fout[3].i = t0_i - t3_r;
383  Fout[7].r = t0_r - t3_i;
384  Fout[7].i = t0_i + t3_r;
385 }
386 
387 static inline void ne10_fft8_backward_int16_scaled (ne10_fft_cpx_int16_t * Fout,
388  ne10_fft_cpx_int16_t * Fin)
389 
390 {
391  ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
392  ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
393  const ne10_int16_t TW_81 = 23169;
394 
395  s0_r = (Fin[0].r + Fin[4].r) >> 3;
396  s0_i = (Fin[0].i + Fin[4].i) >> 3;
397  s1_r = (Fin[0].r - Fin[4].r) >> 3;
398  s1_i = (Fin[0].i - Fin[4].i) >> 3;
399  s2_r = (Fin[1].r + Fin[5].r) >> 3;
400  s2_i = (Fin[1].i + Fin[5].i) >> 3;
401  s3_r = (Fin[1].r - Fin[5].r) >> 3;
402  s3_i = (Fin[1].i - Fin[5].i) >> 3;
403  s4_r = (Fin[2].r + Fin[6].r) >> 3;
404  s4_i = (Fin[2].i + Fin[6].i) >> 3;
405  s5_r = (Fin[2].r - Fin[6].r) >> 3;
406  s5_i = (Fin[2].i - Fin[6].i) >> 3;
407  s6_r = (Fin[3].r + Fin[7].r) >> 3;
408  s6_i = (Fin[3].i + Fin[7].i) >> 3;
409  s7_r = (Fin[3].r - Fin[7].r) >> 3;
410  s7_i = (Fin[3].i - Fin[7].i) >> 3;
411 
412  t0_r = s0_r - s4_r;
413  t0_i = s0_i - s4_i;
414  t1_r = s0_r + s4_r;
415  t1_i = s0_i + s4_i;
416  t2_r = s2_r + s6_r;
417  t2_i = s2_i + s6_i;
418  t3_r = s2_r - s6_r;
419  t3_i = s2_i - s6_i;
420  Fout[0].r = t1_r + t2_r;
421  Fout[0].i = t1_i + t2_i;
422  Fout[4].r = t1_r - t2_r;
423  Fout[4].i = t1_i - t2_i;
424  Fout[2].r = t0_r - t3_i;
425  Fout[2].i = t0_i + t3_r;
426  Fout[6].r = t0_r + t3_i;
427  Fout[6].i = t0_i - t3_r;
428 
429  t4_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r - s3_i) * TW_81) >> NE10_F2I16_SHIFT);
430  t4_i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s3_r + s3_i) * TW_81) >> NE10_F2I16_SHIFT);
431  t5_r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r + s7_i) * TW_81) >> NE10_F2I16_SHIFT);
432  t5_i = - (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (s7_r - s7_i) * TW_81) >> NE10_F2I16_SHIFT);
433 
434  t0_r = s1_r + s5_i;
435  t0_i = s1_i - s5_r;
436  t1_r = s1_r - s5_i;
437  t1_i = s1_i + s5_r;
438  t2_r = t4_r - t5_r;
439  t2_i = t4_i - t5_i;
440  t3_r = t4_r + t5_r;
441  t3_i = t4_i + t5_i;
442  Fout[1].r = t1_r + t2_r;
443  Fout[1].i = t1_i + t2_i;
444  Fout[5].r = t1_r - t2_r;
445  Fout[5].i = t1_i - t2_i;
446  Fout[3].r = t0_r - t3_i;
447  Fout[3].i = t0_i + t3_r;
448  Fout[7].r = t0_r + t3_i;
449  Fout[7].i = t0_i - t3_r;
450 }
451 static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst,
452  const ne10_fft_cpx_int16_t *src,
453  ne10_fft_cpx_int16_t *twiddles,
454  ne10_int32_t ncfft,
455  ne10_int32_t scaled_flag)
456 {
457  ne10_int32_t k;
458  ne10_int32_t count = ncfft / 2;
459  ne10_fft_cpx_int16_t fpnk, fpk, f1k, f2k, tw, tdc;
460  int16x8x2_t q2_fpk, q2_fpnk, q2_tw, q2_dst, q2_dst2;
461  int16x8_t q_fpnk_r, q_fpnk_i;
462  int16x8_t q_f1k_r, q_f1k_i, q_f2k_r, q_f2k_i;
463  int16x8_t q_tw_r, q_tw_i;
464  int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3;
465  int16x8_t q_dst2_r, q_dst2_i;
466  int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles;
467 
468  tdc.r = src[0].r;
469  tdc.i = src[0].i;
470 
471  if (scaled_flag)
472  NE10_F2I16_FIXDIV (tdc, 2);
473 
474  dst[0].r = tdc.r + tdc.i;
475  dst[ncfft].r = tdc.r - tdc.i;
476  dst[ncfft].i = dst[0].i = 0;
477  if (count >= 8)
478  {
479 
480  if (scaled_flag)
481  {
482  for (k = 1; k <= count ; k += 8)
483  {
484  p_src = (int16_t*) (& (src[k]));
485  p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
486  p_twiddles = (int16_t*) (& (twiddles[k - 1]));
487  p_dst = (int16_t*) (& (dst[k]));
488  p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
489 
490  q2_fpk = vld2q_s16 (p_src);
491  q2_fpnk = vld2q_s16 (p_src2);
492 
493  q2_tw = vld2q_s16 (p_twiddles);
494  q2_fpnk.val[0] = vrev32q_s16 (q2_fpnk.val[0]);
495  q2_fpnk.val[1] = vrev32q_s16 (q2_fpnk.val[1]);
496  q2_fpnk.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[0])));
497  q2_fpnk.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[1])));
498  q_fpnk_r = vcombine_s16 (vget_high_s16 (q2_fpnk.val[0]), vget_low_s16 (q2_fpnk.val[0]));
499  q_fpnk_i = vcombine_s16 (vget_high_s16 (q2_fpnk.val[1]), vget_low_s16 (q2_fpnk.val[1]));
500  q_fpnk_i = vnegq_s16 (q_fpnk_i);
501 
502  q_f1k_r = vhaddq_s16 (q2_fpk.val[0], q_fpnk_r);
503  q_f1k_i = vhaddq_s16 (q2_fpk.val[1], q_fpnk_i);
504 
505  q_f2k_r = vhsubq_s16 (q2_fpk.val[0], q_fpnk_r);
506  q_f2k_i = vhsubq_s16 (q2_fpk.val[1], q_fpnk_i);
507 
508  q_tmp0 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[0]);
509  q_tmp1 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[1]);
510  q_tmp2 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[1]);
511  q_tmp3 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[0]);
512  q_tw_r = vsubq_s16 (q_tmp0, q_tmp1);
513  q_tw_i = vaddq_s16 (q_tmp2, q_tmp3);
514 
515  q_dst2_r = vhsubq_s16 (q_f1k_r, q_tw_r);
516  q_dst2_i = vhsubq_s16 (q_tw_i, q_f1k_i);
517  q2_dst.val[0] = vhaddq_s16 (q_f1k_r, q_tw_r);
518  q2_dst.val[1] = vhaddq_s16 (q_f1k_i, q_tw_i);
519  q_dst2_r = vrev32q_s16 (q_dst2_r);
520  q_dst2_i = vrev32q_s16 (q_dst2_i);
521  q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
522  q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
523  q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
524  q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
525  vst2q_s16 (p_dst, q2_dst);
526  vst2q_s16 (p_dst2, q2_dst2);
527 
528  }
529  }
530  else
531  {
532  for (k = 1; k <= count ; k += 8)
533  {
534  p_src = (int16_t*) (& (src[k]));
535  p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
536  p_twiddles = (int16_t*) (& (twiddles[k - 1]));
537  p_dst = (int16_t*) (& (dst[k]));
538  p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
539 
540  q2_fpk = vld2q_s16 (p_src);
541  q2_fpnk = vld2q_s16 (p_src2);
542 
543  q2_tw = vld2q_s16 (p_twiddles);
544  q2_fpnk.val[0] = vrev32q_s16 (q2_fpnk.val[0]);
545  q2_fpnk.val[1] = vrev32q_s16 (q2_fpnk.val[1]);
546  q2_fpnk.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[0])));
547  q2_fpnk.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[1])));
548  q_fpnk_r = vcombine_s16 (vget_high_s16 (q2_fpnk.val[0]), vget_low_s16 (q2_fpnk.val[0]));
549  q_fpnk_i = vcombine_s16 (vget_high_s16 (q2_fpnk.val[1]), vget_low_s16 (q2_fpnk.val[1]));
550  q_fpnk_i = vnegq_s16 (q_fpnk_i);
551 
552  q_f1k_r = vaddq_s16 (q2_fpk.val[0], q_fpnk_r);
553  q_f1k_i = vaddq_s16 (q2_fpk.val[1], q_fpnk_i);
554 
555  q_f2k_r = vsubq_s16 (q2_fpk.val[0], q_fpnk_r);
556  q_f2k_i = vsubq_s16 (q2_fpk.val[1], q_fpnk_i);
557 
558  q_tmp0 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[0]);
559  q_tmp1 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[1]);
560  q_tmp2 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[1]);
561  q_tmp3 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[0]);
562  q_tw_r = vsubq_s16 (q_tmp0, q_tmp1);
563  q_tw_i = vaddq_s16 (q_tmp2, q_tmp3);
564 
565  q_dst2_r = vhsubq_s16 (q_f1k_r, q_tw_r);
566  q_dst2_i = vhsubq_s16 (q_tw_i, q_f1k_i);
567  q2_dst.val[0] = vhaddq_s16 (q_f1k_r, q_tw_r);
568  q2_dst.val[1] = vhaddq_s16 (q_f1k_i, q_tw_i);
569  q_dst2_r = vrev32q_s16 (q_dst2_r);
570  q_dst2_i = vrev32q_s16 (q_dst2_i);
571  q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
572  q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
573  q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
574  q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
575  vst2q_s16 (p_dst, q2_dst);
576  vst2q_s16 (p_dst2, q2_dst2);
577 
578  }
579  }
580  }
581  else
582  {
583 
584  for (k = 1; k <= ncfft / 2 ; ++k)
585  {
586  fpk = src[k];
587  fpnk.r = src[ncfft - k].r;
588  fpnk.i = - src[ncfft - k].i;
589  if (scaled_flag)
590  {
591  NE10_F2I16_FIXDIV (fpk, 2);
592  NE10_F2I16_FIXDIV (fpnk, 2);
593  }
594 
595  f1k.r = fpk.r + fpnk.r;
596  f1k.i = fpk.i + fpnk.i;
597 
598  f2k.r = fpk.r - fpnk.r;
599  f2k.i = fpk.i - fpnk.i;
600 
601  tw.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).r
602  - (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
603  tw.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).i
604  + (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).r) >> NE10_F2I16_SHIFT);
605 
606  dst[k].r = (f1k.r + tw.r) >> 1;
607  dst[k].i = (f1k.i + tw.i) >> 1;
608  dst[ncfft - k].r = (f1k.r - tw.r) >> 1;
609  dst[ncfft - k].i = (tw.i - f1k.i) >> 1;
610  }
611  }
612 }
613 
614 static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst,
615  const ne10_fft_cpx_int16_t *src,
616  ne10_fft_cpx_int16_t *twiddles,
617  ne10_int32_t ncfft,
618  ne10_int32_t scaled_flag)
619 {
620 
621  ne10_int32_t k;
622  ne10_int32_t count = ncfft / 2;
623  ne10_fft_cpx_int16_t fk, fnkc, fek, fok, tmp;
624  int16x8x2_t q2_fk, q2_fnkc, q2_tw, q2_dst, q2_dst2;
625  int16x8_t q_fnkc_r, q_fnkc_i;
626  int16x8_t q_fek_r, q_fek_i, q_fok_r, q_fok_i;
627  int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3;
628  int16x8_t q_dst2_r, q_dst2_i;
629  int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles;
630 
631 
632  dst[0].r = src[0].r + src[ncfft].r;
633  dst[0].i = src[0].r - src[ncfft].r;
634 
635  if (scaled_flag)
636  NE10_F2I16_FIXDIV (dst[0], 2);
637  if (count >= 8)
638  {
639  if (scaled_flag)
640  {
641  for (k = 1; k <= count ; k += 8)
642  {
643  p_src = (int16_t*) (& (src[k]));
644  p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
645  p_twiddles = (int16_t*) (& (twiddles[k - 1]));
646  p_dst = (int16_t*) (& (dst[k]));
647  p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
648 
649  q2_fk = vld2q_s16 (p_src);
650  q2_fnkc = vld2q_s16 (p_src2);
651  q2_tw = vld2q_s16 (p_twiddles);
652  q2_fnkc.val[0] = vrev32q_s16 (q2_fnkc.val[0]);
653  q2_fnkc.val[1] = vrev32q_s16 (q2_fnkc.val[1]);
654  q2_fnkc.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[0])));
655  q2_fnkc.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[1])));
656  q_fnkc_r = vcombine_s16 (vget_high_s16 (q2_fnkc.val[0]), vget_low_s16 (q2_fnkc.val[0]));
657  q_fnkc_i = vcombine_s16 (vget_high_s16 (q2_fnkc.val[1]), vget_low_s16 (q2_fnkc.val[1]));
658  q_fnkc_i = vnegq_s16 (q_fnkc_i);
659 
660  q_fek_r = vhaddq_s16 (q2_fk.val[0], q_fnkc_r);
661  q_fek_i = vhaddq_s16 (q2_fk.val[1], q_fnkc_i);
662  q_tmp0 = vhsubq_s16 (q2_fk.val[0], q_fnkc_r);
663  q_tmp1 = vhsubq_s16 (q2_fk.val[1], q_fnkc_i);
664 
665  q_fok_r = vqdmulhq_s16 (q_tmp0, q2_tw.val[0]);
666  q_fok_i = vqdmulhq_s16 (q_tmp1, q2_tw.val[0]);
667  q_tmp2 = vqdmulhq_s16 (q_tmp1, q2_tw.val[1]);
668  q_tmp3 = vqdmulhq_s16 (q_tmp0, q2_tw.val[1]);
669  q_fok_r = vaddq_s16 (q_fok_r, q_tmp2);
670  q_fok_i = vsubq_s16 (q_fok_i, q_tmp3);
671 
672  q_dst2_r = vsubq_s16 (q_fek_r, q_fok_r);
673  q_dst2_i = vsubq_s16 (q_fok_i, q_fek_i);
674  q2_dst.val[0] = vaddq_s16 (q_fek_r, q_fok_r);
675  q2_dst.val[1] = vaddq_s16 (q_fek_i, q_fok_i);
676  q_dst2_r = vrev32q_s16 (q_dst2_r);
677  q_dst2_i = vrev32q_s16 (q_dst2_i);
678  q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
679  q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
680  q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
681  q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
682  vst2q_s16 (p_dst, q2_dst);
683  vst2q_s16 (p_dst2, q2_dst2);
684 
685  }
686 
687  }
688  else
689  {
690  for (k = 1; k <= count ; k += 8)
691  {
692  p_src = (int16_t*) (& (src[k]));
693  p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
694  p_twiddles = (int16_t*) (& (twiddles[k - 1]));
695  p_dst = (int16_t*) (& (dst[k]));
696  p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
697 
698  q2_fk = vld2q_s16 (p_src);
699  q2_fnkc = vld2q_s16 (p_src2);
700  q2_tw = vld2q_s16 (p_twiddles);
701  q2_fnkc.val[0] = vrev32q_s16 (q2_fnkc.val[0]);
702  q2_fnkc.val[1] = vrev32q_s16 (q2_fnkc.val[1]);
703  q2_fnkc.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[0])));
704  q2_fnkc.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[1])));
705  q_fnkc_r = vcombine_s16 (vget_high_s16 (q2_fnkc.val[0]), vget_low_s16 (q2_fnkc.val[0]));
706  q_fnkc_i = vcombine_s16 (vget_high_s16 (q2_fnkc.val[1]), vget_low_s16 (q2_fnkc.val[1]));
707  q_fnkc_i = vnegq_s16 (q_fnkc_i);
708 
709  q_fek_r = vaddq_s16 (q2_fk.val[0], q_fnkc_r);
710  q_fek_i = vaddq_s16 (q2_fk.val[1], q_fnkc_i);
711  q_tmp0 = vsubq_s16 (q2_fk.val[0], q_fnkc_r);
712  q_tmp1 = vsubq_s16 (q2_fk.val[1], q_fnkc_i);
713 
714  q_fok_r = vqdmulhq_s16 (q_tmp0, q2_tw.val[0]);
715  q_fok_i = vqdmulhq_s16 (q_tmp1, q2_tw.val[0]);
716  q_tmp2 = vqdmulhq_s16 (q_tmp1, q2_tw.val[1]);
717  q_tmp3 = vqdmulhq_s16 (q_tmp0, q2_tw.val[1]);
718  q_fok_r = vaddq_s16 (q_fok_r, q_tmp2);
719  q_fok_i = vsubq_s16 (q_fok_i, q_tmp3);
720 
721  q_dst2_r = vsubq_s16 (q_fek_r, q_fok_r);
722  q_dst2_i = vsubq_s16 (q_fok_i, q_fek_i);
723  q2_dst.val[0] = vaddq_s16 (q_fek_r, q_fok_r);
724  q2_dst.val[1] = vaddq_s16 (q_fek_i, q_fok_i);
725  q_dst2_r = vrev32q_s16 (q_dst2_r);
726  q_dst2_i = vrev32q_s16 (q_dst2_i);
727  q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
728  q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
729  q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
730  q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
731  vst2q_s16 (p_dst, q2_dst);
732  vst2q_s16 (p_dst2, q2_dst2);
733 
734  }
735  }
736  }
737  else
738  {
739 
740  for (k = 1; k <= ncfft / 2; k++)
741  {
742  fk = src[k];
743  fnkc.r = src[ncfft - k].r;
744  fnkc.i = -src[ncfft - k].i;
745  if (scaled_flag)
746  {
747  NE10_F2I16_FIXDIV (fk, 2);
748  NE10_F2I16_FIXDIV (fnkc, 2);
749  }
750 
751  fek.r = fk.r + fnkc.r;
752  fek.i = fk.i + fnkc.i;
753 
754  tmp.r = fk.r - fnkc.r;
755  tmp.i = fk.i - fnkc.i;
756 
757  fok.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).r
758  + (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
759  fok.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).r
760  - (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
761 
762  dst[k].r = fek.r + fok.r;
763  dst[k].i = fek.i + fok.i;
764 
765  dst[ncfft - k].r = fek.r - fok.r;
766  dst[ncfft - k].i = fok.i - fek.i;
767  }
768  }
769 }
770 
778  ne10_int32_t inverse_fft,
779  ne10_int32_t scaled_flag)
780 {
781  if (scaled_flag)
782  {
783  if (inverse_fft)
784  {
785  switch (cfg->nfft)
786  {
787  case 1:
788  fout[0] = fin[0];
789  break;
790  case 2:
791  ne10_fft2_backward_int16_scaled (fout, fin);
792  break;
793  case 4:
794  ne10_fft4_backward_int16_scaled (fout, fin);
795  break;
796  case 8:
797  ne10_fft8_backward_int16_scaled (fout, fin);
798  break;
799  default:
801  break;
802  }
803  }
804  else
805  {
806  switch (cfg->nfft)
807  {
808  case 1:
809  fout[0] = fin[0];
810  break;
811  case 2:
812  ne10_fft2_forward_int16_scaled (fout, fin);
813  break;
814  case 4:
815  ne10_fft4_forward_int16_scaled (fout, fin);
816  break;
817  case 8:
818  ne10_fft8_forward_int16_scaled (fout, fin);
819  break;
820  default:
822  break;
823  }
824  }
825  }
826  else
827  {
828  if (inverse_fft)
829  {
830  switch (cfg->nfft)
831  {
832  case 1:
833  fout[0] = fin[0];
834  break;
835  case 2:
836  ne10_fft2_backward_int16_unscaled (fout, fin);
837  break;
838  case 4:
839  ne10_fft4_backward_int16_unscaled (fout, fin);
840  break;
841  case 8:
842  ne10_fft8_backward_int16_unscaled (fout, fin);
843  break;
844  default:
846  break;
847  }
848  }
849  else
850  {
851  switch (cfg->nfft)
852  {
853  case 1:
854  fout[0] = fin[0];
855  break;
856  case 2:
857  ne10_fft2_forward_int16_unscaled (fout, fin);
858  break;
859  case 4:
860  ne10_fft4_forward_int16_unscaled (fout, fin);
861  break;
862  case 8:
863  ne10_fft8_forward_int16_unscaled (fout, fin);
864  break;
865  default:
867  break;
868  }
869  }
870  }
871 }
872 
878  ne10_int16_t *fin,
880  ne10_int32_t scaled_flag)
881 {
882  ne10_fft_cpx_int16_t * tmpbuf1 = cfg->buffer;
883  ne10_fft_cpx_int16_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
884  ne10_fft_state_int16_t c2c_state;
885 
886  c2c_state.nfft = cfg->ncfft;
887  c2c_state.factors = cfg->factors;
888  c2c_state.twiddles = cfg->twiddles;
889  c2c_state.buffer = tmpbuf2;
890 
891  ne10_fft_c2c_1d_int16_neon (tmpbuf1, (ne10_fft_cpx_int16_t*) fin, &c2c_state, 0, scaled_flag);
892  ne10_fft_split_r2c_1d_int16_neon (fout, tmpbuf1, cfg->super_twiddles, cfg->ncfft, scaled_flag);
893 }
894 
902  ne10_int32_t scaled_flag)
903 {
904  ne10_fft_cpx_int16_t * tmpbuf1 = cfg->buffer;
905  ne10_fft_cpx_int16_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
906  ne10_fft_state_int16_t c2c_state;
907 
908  c2c_state.nfft = cfg->ncfft;
909  c2c_state.factors = cfg->factors;
910  c2c_state.twiddles = cfg->twiddles;
911  c2c_state.buffer = tmpbuf2;
912 
913  ne10_fft_split_c2r_1d_int16_neon (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft, scaled_flag);
914  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) fout, tmpbuf1, &c2c_state, 1, scaled_flag);
915 }
void ne10_mixed_radix_fft_backward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_scaled_neon")
Structure for the 16-bit fixed point FFT function.
Definition: NE10_types.h:294
ne10_fft_cpx_int16_t * twiddles
Definition: NE10_types.h:315
ne10_int16_t r
Definition: NE10_types.h:296
ne10_int32_t * factors
Definition: NE10_types.h:314
#define NE10_F2I16_SHIFT
Definition: NE10_macros.h:72
void ne10_mixed_radix_fft_forward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_scaled_neon")
ne10_fft_cpx_int16_t * twiddles
Definition: NE10_types.h:304
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using NEON SIMD capabilities.
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using NEON SIMD capabilities.
#define NE10_F2I16_SAMPPROD
Definition: NE10_macros.h:73
ne10_fft_cpx_int16_t * super_twiddles
Definition: NE10_types.h:316
ne10_int16_t i
Definition: NE10_types.h:297
void ne10_mixed_radix_fft_forward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_unscaled_neon")
void ne10_mixed_radix_fft_backward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_unscaled_neon")
ne10_int32_t * factors
Definition: NE10_types.h:303
#define NE10_F2I16_FIXDIV(c, div)
Definition: NE10_macros.h:77
ne10_fft_cpx_int16_t * buffer
Definition: NE10_types.h:305
int16_t ne10_int16_t
Definition: NE10_types.h:74
ne10_fft_cpx_int16_t * buffer
Definition: NE10_types.h:317
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using NEON SIMD capabilities.